pilotj commited on
Commit
6204dc8
·
verified ·
1 Parent(s): b6e23b3

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "pilotj/roberta-base-v1",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
@@ -77,7 +77,7 @@
77
  "position_embedding_type": "absolute",
78
  "problem_type": "single_label_classification",
79
  "torch_dtype": "float32",
80
- "transformers_version": "4.44.2",
81
  "type_vocab_size": 1,
82
  "use_cache": true,
83
  "vocab_size": 50265
 
1
  {
2
+ "_name_or_path": "pilotj/roberta-base-pretrained-v1",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
 
77
  "position_embedding_type": "absolute",
78
  "problem_type": "single_label_classification",
79
  "torch_dtype": "float32",
80
+ "transformers_version": "4.45.1",
81
  "type_vocab_size": 1,
82
  "use_cache": true,
83
  "vocab_size": 50265
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9900b3c186c079bcb23fca4d7455eb1c1fa4a6834aa080210c16cf89ed37cf6a
3
  size 498686648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb47bfaa1236115f61567fe8fdfd755a48f14a81fe810bfa5b05f0456856537
3
  size 498686648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79da3e9caf7d4d2e6f1dcc2334761728f64c3e8f0ede33e99992bae602b52fc1
3
- size 997493050
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f815fc74f6bd8a68f85c1a97196329bbeb31b2a1aab556c287e80f3e106ef1f2
3
+ size 997493114
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:721bc7a5b221d6b012d2388a6a09216865e2462e31bd2e0fe77aaddae689aeb6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba896bfc67506377d370f41aae67965419a152a2bf120ebf3532f5747c268a3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e27a97977fcc445cdd5c99153117efc644690cb82cdc8080515d4ff3a8dec66
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:570b944acc1715de82f5f251df6b944775e9ac9603b83fb9b4e3b4e43503d7f8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,239 +1,59 @@
1
  {
2
- "best_metric": 0.4090208411216736,
3
- "best_model_checkpoint": "results/checkpoint-5000",
4
- "epoch": 1.0429506020669386,
5
  "eval_steps": 500,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0948136910969944,
13
- "grad_norm": 273121.5,
14
- "learning_rate": 1.905177318414565e-05,
15
- "loss": 0.4026,
16
  "step": 500
17
  },
18
  {
19
- "epoch": 0.0948136910969944,
20
- "eval_accuracy": 0.8695388065978226,
21
- "eval_f1_macro": 0.8390046601833578,
22
- "eval_f1_w": 0.8708233733420756,
23
- "eval_loss": 0.471578985452652,
24
- "eval_precision": 0.8742116654929993,
25
- "eval_recall": 0.8695388065978226,
26
- "eval_runtime": 376.2807,
27
- "eval_samples_per_second": 56.875,
28
- "eval_steps_per_second": 0.446,
29
  "step": 500
30
  },
31
  {
32
- "epoch": 0.1896273821939888,
33
- "grad_norm": 298776.75,
34
- "learning_rate": 1.8103546368291297e-05,
35
- "loss": 0.3968,
36
  "step": 1000
37
  },
38
  {
39
- "epoch": 0.1896273821939888,
40
- "eval_accuracy": 0.8686977244054016,
41
- "eval_f1_macro": 0.8346677335711247,
42
- "eval_f1_w": 0.8703092882080509,
43
- "eval_loss": 0.45676785707473755,
44
- "eval_precision": 0.8751669722412856,
45
- "eval_recall": 0.8686977244054016,
46
- "eval_runtime": 375.4216,
47
- "eval_samples_per_second": 57.005,
48
- "eval_steps_per_second": 0.447,
49
  "step": 1000
50
- },
51
- {
52
- "epoch": 0.2844410732909832,
53
- "grad_norm": 330839.46875,
54
- "learning_rate": 1.7155319552436945e-05,
55
- "loss": 0.3614,
56
- "step": 1500
57
- },
58
- {
59
- "epoch": 0.2844410732909832,
60
- "eval_accuracy": 0.8692117190785478,
61
- "eval_f1_macro": 0.8366798796528604,
62
- "eval_f1_w": 0.8714368286444473,
63
- "eval_loss": 0.4663577973842621,
64
- "eval_precision": 0.8769017804493384,
65
- "eval_recall": 0.8692117190785478,
66
- "eval_runtime": 369.6202,
67
- "eval_samples_per_second": 57.9,
68
- "eval_steps_per_second": 0.455,
69
- "step": 1500
70
- },
71
- {
72
- "epoch": 0.3792547643879776,
73
- "grad_norm": 351888.96875,
74
- "learning_rate": 1.6207092736582593e-05,
75
- "loss": 0.3413,
76
- "step": 2000
77
- },
78
- {
79
- "epoch": 0.3792547643879776,
80
- "eval_accuracy": 0.8746320265408158,
81
- "eval_f1_macro": 0.8421666352401839,
82
- "eval_f1_w": 0.8757671689389476,
83
- "eval_loss": 0.4543912708759308,
84
- "eval_precision": 0.8788228792310168,
85
- "eval_recall": 0.8746320265408158,
86
- "eval_runtime": 369.4569,
87
- "eval_samples_per_second": 57.926,
88
- "eval_steps_per_second": 0.455,
89
- "step": 2000
90
- },
91
- {
92
- "epoch": 0.474068455484972,
93
- "grad_norm": 248588.671875,
94
- "learning_rate": 1.5258865920728237e-05,
95
- "loss": 0.3377,
96
- "step": 2500
97
- },
98
- {
99
- "epoch": 0.474068455484972,
100
- "eval_accuracy": 0.8697724405401617,
101
- "eval_f1_macro": 0.8382571121374275,
102
- "eval_f1_w": 0.8717700909838132,
103
- "eval_loss": 0.4674856960773468,
104
- "eval_precision": 0.8768311756042306,
105
- "eval_recall": 0.8697724405401617,
106
- "eval_runtime": 369.663,
107
- "eval_samples_per_second": 57.893,
108
- "eval_steps_per_second": 0.454,
109
- "step": 2500
110
- },
111
- {
112
- "epoch": 0.5688821465819665,
113
- "grad_norm": 296608.8125,
114
- "learning_rate": 1.4310639104873887e-05,
115
- "loss": 0.4106,
116
- "step": 3000
117
- },
118
- {
119
- "epoch": 0.5688821465819665,
120
- "eval_accuracy": 0.8773889070604177,
121
- "eval_f1_macro": 0.84599701470003,
122
- "eval_f1_w": 0.8787421942102768,
123
- "eval_loss": 0.42805689573287964,
124
- "eval_precision": 0.8819775414007026,
125
- "eval_recall": 0.8773889070604177,
126
- "eval_runtime": 369.0758,
127
- "eval_samples_per_second": 57.985,
128
- "eval_steps_per_second": 0.455,
129
- "step": 3000
130
- },
131
- {
132
- "epoch": 0.6636958376789608,
133
- "grad_norm": 360671.71875,
134
- "learning_rate": 1.3362412289019534e-05,
135
- "loss": 0.4845,
136
- "step": 3500
137
- },
138
- {
139
- "epoch": 0.6636958376789608,
140
- "eval_accuracy": 0.8777627213681604,
141
- "eval_f1_macro": 0.8440419694609644,
142
- "eval_f1_w": 0.8784727825739984,
143
- "eval_loss": 0.4156029224395752,
144
- "eval_precision": 0.8807763829362149,
145
- "eval_recall": 0.8777627213681604,
146
- "eval_runtime": 369.3183,
147
- "eval_samples_per_second": 57.947,
148
- "eval_steps_per_second": 0.455,
149
- "step": 3500
150
- },
151
- {
152
- "epoch": 0.7585095287759552,
153
- "grad_norm": 292012.375,
154
- "learning_rate": 1.241418547316518e-05,
155
- "loss": 0.4711,
156
- "step": 4000
157
- },
158
- {
159
- "epoch": 0.7585095287759552,
160
- "eval_accuracy": 0.8772487266950143,
161
- "eval_f1_macro": 0.8459498008507077,
162
- "eval_f1_w": 0.878978287181103,
163
- "eval_loss": 0.42315369844436646,
164
- "eval_precision": 0.8830933782031678,
165
- "eval_recall": 0.8772487266950143,
166
- "eval_runtime": 369.6327,
167
- "eval_samples_per_second": 57.898,
168
- "eval_steps_per_second": 0.455,
169
- "step": 4000
170
- },
171
- {
172
- "epoch": 0.8533232198729497,
173
- "grad_norm": 553655.25,
174
- "learning_rate": 1.146595865731083e-05,
175
- "loss": 0.4648,
176
- "step": 4500
177
- },
178
- {
179
- "epoch": 0.8533232198729497,
180
- "eval_accuracy": 0.876501098079529,
181
- "eval_f1_macro": 0.8445177150549945,
182
- "eval_f1_w": 0.8782560075605943,
183
- "eval_loss": 0.4161696434020996,
184
- "eval_precision": 0.8828712896671246,
185
- "eval_recall": 0.876501098079529,
186
- "eval_runtime": 376.9735,
187
- "eval_samples_per_second": 56.771,
188
- "eval_steps_per_second": 0.446,
189
- "step": 4500
190
- },
191
- {
192
- "epoch": 0.948136910969944,
193
- "grad_norm": 312566.46875,
194
- "learning_rate": 1.0517731841456478e-05,
195
- "loss": 0.4634,
196
- "step": 5000
197
- },
198
- {
199
- "epoch": 0.948136910969944,
200
- "eval_accuracy": 0.8801925143684874,
201
- "eval_f1_macro": 0.8492634513992154,
202
- "eval_f1_w": 0.8814973283712985,
203
- "eval_loss": 0.4090208411216736,
204
- "eval_precision": 0.8851875251536223,
205
- "eval_recall": 0.8801925143684874,
206
- "eval_runtime": 373.6216,
207
- "eval_samples_per_second": 57.28,
208
- "eval_steps_per_second": 0.45,
209
- "step": 5000
210
- },
211
- {
212
- "epoch": 1.0429506020669386,
213
- "grad_norm": 298623.21875,
214
- "learning_rate": 9.569505025602126e-06,
215
- "loss": 0.4332,
216
- "step": 5500
217
- },
218
- {
219
- "epoch": 1.0429506020669386,
220
- "eval_accuracy": 0.8806597822531658,
221
- "eval_f1_macro": 0.8501793427912383,
222
- "eval_f1_w": 0.8820343950051113,
223
- "eval_loss": 0.4112658202648163,
224
- "eval_precision": 0.8853039418950968,
225
- "eval_recall": 0.8806597822531658,
226
- "eval_runtime": 377.6622,
227
- "eval_samples_per_second": 56.667,
228
- "eval_steps_per_second": 0.445,
229
- "step": 5500
230
  }
231
  ],
232
  "logging_steps": 500,
233
- "max_steps": 10546,
234
  "num_input_tokens_seen": 0,
235
  "num_train_epochs": 2,
236
- "save_steps": 500,
237
  "stateful_callbacks": {
238
  "TrainerControl": {
239
  "args": {
@@ -246,8 +66,8 @@
246
  "attributes": {}
247
  }
248
  },
249
- "total_flos": 1.8526799220115046e+17,
250
- "train_batch_size": 32,
251
  "trial_name": null,
252
  "trial_params": null
253
  }
 
1
  {
2
+ "best_metric": 0.4097191095352173,
3
+ "best_model_checkpoint": "results/checkpoint-1000",
4
+ "epoch": 0.37921880925293894,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.18960940462646947,
13
+ "grad_norm": 3.0855796337127686,
14
+ "learning_rate": 1.8103905953735305e-05,
15
+ "loss": 0.3932,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.18960940462646947,
20
+ "eval_accuracy": 0.8802859679454231,
21
+ "eval_f1_macro": 0.8504927369674822,
22
+ "eval_f1_w": 0.8815894117039326,
23
+ "eval_loss": 0.41382548213005066,
24
+ "eval_precision": 0.8847207792383885,
25
+ "eval_recall": 0.8802859679454231,
26
+ "eval_runtime": 26.8685,
27
+ "eval_samples_per_second": 796.51,
28
+ "eval_steps_per_second": 12.468,
29
  "step": 500
30
  },
31
  {
32
+ "epoch": 0.37921880925293894,
33
+ "grad_norm": 3.5775933265686035,
34
+ "learning_rate": 1.6207811907470613e-05,
35
+ "loss": 0.3997,
36
  "step": 1000
37
  },
38
  {
39
+ "epoch": 0.37921880925293894,
40
+ "eval_accuracy": 0.8809401429839727,
41
+ "eval_f1_macro": 0.8499475297827489,
42
+ "eval_f1_w": 0.8824416398671949,
43
+ "eval_loss": 0.4097191095352173,
44
+ "eval_precision": 0.8860689951754195,
45
+ "eval_recall": 0.8809401429839727,
46
+ "eval_runtime": 26.7804,
47
+ "eval_samples_per_second": 799.129,
48
+ "eval_steps_per_second": 12.509,
49
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 5274,
54
  "num_input_tokens_seen": 0,
55
  "num_train_epochs": 2,
56
+ "save_steps": 1000,
57
  "stateful_callbacks": {
58
  "TrainerControl": {
59
  "args": {
 
66
  "attributes": {}
67
  }
68
  },
69
+ "total_flos": 6.7370944561152e+16,
70
+ "train_batch_size": 128,
71
  "trial_name": null,
72
  "trial_params": null
73
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e5add9cc612ced3da535c4ff1875848067ed7f560d9dea79e9817af3f61331a
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d2ac5ef996fdddaed4945f20a88f7c048db0d5270b6ecce4dd956dcf5aae1d5
3
+ size 5240