TracyTank commited on
Commit
d188131
·
verified ·
1 Parent(s): c91f9d3

Training in progress, step 163, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b23c3ed9ca3d367f819e5e0f976976bb29715d12f77fa36cfba3fd2762ee1e5a
3
  size 113284112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18043fdd8c36ac71fdd58bde02479000547faeb16a8aa60df2df3061f7267aa
3
  size 113284112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a49d8e9467d8a819ee53f6d9a03675427ed30a69bb7eb5e261c6723ffb9ff15d
3
  size 226735226
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cf2b7d5f1432e60a03110a2e17c4d730a72fe828144d9475bede562ac06c4fd
3
  size 226735226
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd1ec9fb851d792b3ae9928a38b2567f210c5b94d339efe990abf35259dbcb2e
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf343d44a04013b3ab623bb04d8748cf1fb1296f3a24c7edc32c421137ed54e
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cf844ee36bd05503fd0e310aa60493ab65687b1b06ef091d72b007ed27e51cc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d733608e7147affc4bb257789d4d3ee6847974683a496b513031a205dfc8fe
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8db00be51153759083358211ad6f93250d612f1c9e4b3b3ec9316cf4b3c92302
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd00f90a0abf48d91437e86f937449b61287de4595369a15b076a69313313fb3
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89f6396d021957a0847ecc2baee755a89d2dcebe7d8a2ae38471a216faa3f28d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308922c9791cf368d171ea9bfe4696c89d23a5f6ee2eb4c9f898354b111c66f8
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b780cd0b27f7b98fad14003d4e9569b153f787a5a1a687c28143f19bea8e8a3b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9584b990eb1d762133ca744b2d560c00d1b8e953f0c718d599b13b400bf125a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.625535011291504,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 3.7147806004618937,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1113,6 +1113,97 @@
1113
  "eval_samples_per_second": 76.899,
1114
  "eval_steps_per_second": 19.994,
1115
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  }
1117
  ],
1118
  "logging_steps": 1,
@@ -1136,12 +1227,12 @@
1136
  "should_evaluate": false,
1137
  "should_log": false,
1138
  "should_save": true,
1139
- "should_training_stop": false
1140
  },
1141
  "attributes": {}
1142
  }
1143
  },
1144
- "total_flos": 6.331051303501824e+17,
1145
  "train_batch_size": 1,
1146
  "trial_name": null,
1147
  "trial_params": null
 
1
  {
2
  "best_metric": 1.625535011291504,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 4.041570438799076,
5
  "eval_steps": 25,
6
+ "global_step": 163,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1113
  "eval_samples_per_second": 76.899,
1114
  "eval_steps_per_second": 19.994,
1115
  "step": 150
1116
+ },
1117
+ {
1118
+ "epoch": 3.7394149345650503,
1119
+ "grad_norm": 1.3795236349105835,
1120
+ "learning_rate": 1.1324461613022928e-05,
1121
+ "loss": 0.5866,
1122
+ "step": 151
1123
+ },
1124
+ {
1125
+ "epoch": 3.7640492686682063,
1126
+ "grad_norm": 1.1751108169555664,
1127
+ "learning_rate": 1.1113793087267173e-05,
1128
+ "loss": 2.1343,
1129
+ "step": 152
1130
+ },
1131
+ {
1132
+ "epoch": 3.7886836027713624,
1133
+ "grad_norm": 1.213850736618042,
1134
+ "learning_rate": 1.092115264363775e-05,
1135
+ "loss": 2.0515,
1136
+ "step": 153
1137
+ },
1138
+ {
1139
+ "epoch": 3.813317936874519,
1140
+ "grad_norm": 1.2193846702575684,
1141
+ "learning_rate": 1.0746619417197435e-05,
1142
+ "loss": 1.9269,
1143
+ "step": 154
1144
+ },
1145
+ {
1146
+ "epoch": 3.837952270977675,
1147
+ "grad_norm": 1.266520619392395,
1148
+ "learning_rate": 1.0590265104717518e-05,
1149
+ "loss": 1.8284,
1150
+ "step": 155
1151
+ },
1152
+ {
1153
+ "epoch": 3.8625866050808315,
1154
+ "grad_norm": 1.2946449518203735,
1155
+ "learning_rate": 1.045215393522539e-05,
1156
+ "loss": 1.6426,
1157
+ "step": 156
1158
+ },
1159
+ {
1160
+ "epoch": 3.8872209391839876,
1161
+ "grad_norm": 1.5318657159805298,
1162
+ "learning_rate": 1.0332342643619824e-05,
1163
+ "loss": 1.7901,
1164
+ "step": 157
1165
+ },
1166
+ {
1167
+ "epoch": 3.911855273287144,
1168
+ "grad_norm": 1.4870713949203491,
1169
+ "learning_rate": 1.023088044736472e-05,
1170
+ "loss": 1.6923,
1171
+ "step": 158
1172
+ },
1173
+ {
1174
+ "epoch": 3.9364896073903,
1175
+ "grad_norm": 1.6788103580474854,
1176
+ "learning_rate": 1.0147809026271016e-05,
1177
+ "loss": 1.5306,
1178
+ "step": 159
1179
+ },
1180
+ {
1181
+ "epoch": 3.9611239414934563,
1182
+ "grad_norm": 1.869388461112976,
1183
+ "learning_rate": 1.0083162505374962e-05,
1184
+ "loss": 1.43,
1185
+ "step": 160
1186
+ },
1187
+ {
1188
+ "epoch": 3.9857582755966128,
1189
+ "grad_norm": 1.462508201599121,
1190
+ "learning_rate": 1.0036967440919847e-05,
1191
+ "loss": 0.7244,
1192
+ "step": 161
1193
+ },
1194
+ {
1195
+ "epoch": 4.01693610469592,
1196
+ "grad_norm": 2.254340648651123,
1197
+ "learning_rate": 1.0009242809446925e-05,
1198
+ "loss": 3.4942,
1199
+ "step": 162
1200
+ },
1201
+ {
1202
+ "epoch": 4.041570438799076,
1203
+ "grad_norm": 1.1474640369415283,
1204
+ "learning_rate": 1e-05,
1205
+ "loss": 1.8964,
1206
+ "step": 163
1207
  }
1208
  ],
1209
  "logging_steps": 1,
 
1227
  "should_evaluate": false,
1228
  "should_log": false,
1229
  "should_save": true,
1230
+ "should_training_stop": true
1231
  },
1232
  "attributes": {}
1233
  }
1234
  },
1235
+ "total_flos": 6.879742416471982e+17,
1236
  "train_batch_size": 1,
1237
  "trial_name": null,
1238
  "trial_params": null