t1msan commited on
Commit
56bd729
1 Parent(s): 909b2ae

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.21,
3
+ "eval_loss": 0.0002610796655062586,
4
+ "eval_runtime": 4.4046,
5
+ "eval_samples_per_second": 30.196,
6
+ "eval_steps_per_second": 1.135,
7
+ "total_flos": 3.9447179555061105e+18,
8
+ "train_loss": 0.08187244446534249,
9
+ "train_runtime": 1269.7055,
10
+ "train_samples_per_second": 14.106,
11
+ "train_steps_per_second": 0.106
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.21,
3
+ "eval_loss": 0.0002610796655062586,
4
+ "eval_runtime": 4.4046,
5
+ "eval_samples_per_second": 30.196,
6
+ "eval_steps_per_second": 1.135
7
+ }
runs/Apr23_19-10-48_abf93a4c4e7c/events.out.tfevents.1713900955.abf93a4c4e7c.34.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4b7e0d5345788759199d441832a468b33b67a3bb412a8bb343d3868d5f916e
3
+ size 630
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.21,
3
+ "total_flos": 3.9447179555061105e+18,
4
+ "train_loss": 0.08187244446534249,
5
+ "train_runtime": 1269.7055,
6
+ "train_samples_per_second": 14.106,
7
+ "train_steps_per_second": 0.106
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0002610796655062586,
3
+ "best_model_checkpoint": "convnext-base-384-22k-1k-Kontur-competition-1.3K/checkpoint-135",
4
+ "epoch": 14.210526315789474,
5
+ "eval_steps": 500,
6
+ "global_step": 135,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.95,
13
+ "eval_loss": 0.5273232460021973,
14
+ "eval_runtime": 5.3966,
15
+ "eval_samples_per_second": 24.645,
16
+ "eval_steps_per_second": 0.927,
17
+ "step": 9
18
+ },
19
+ {
20
+ "epoch": 1.05,
21
+ "grad_norm": 14.016679763793945,
22
+ "learning_rate": 3.571428571428572e-05,
23
+ "loss": 0.6611,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "eval_loss": 0.15177780389785767,
29
+ "eval_runtime": 4.4294,
30
+ "eval_samples_per_second": 30.027,
31
+ "eval_steps_per_second": 1.129,
32
+ "step": 19
33
+ },
34
+ {
35
+ "epoch": 2.11,
36
+ "grad_norm": 3.7719967365264893,
37
+ "learning_rate": 4.75206611570248e-05,
38
+ "loss": 0.2686,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 2.95,
43
+ "eval_loss": 0.026649044826626778,
44
+ "eval_runtime": 4.3155,
45
+ "eval_samples_per_second": 30.819,
46
+ "eval_steps_per_second": 1.159,
47
+ "step": 28
48
+ },
49
+ {
50
+ "epoch": 3.16,
51
+ "grad_norm": 9.589371681213379,
52
+ "learning_rate": 4.338842975206612e-05,
53
+ "loss": 0.0899,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 4.0,
58
+ "eval_loss": 0.00659002223983407,
59
+ "eval_runtime": 4.3662,
60
+ "eval_samples_per_second": 30.461,
61
+ "eval_steps_per_second": 1.145,
62
+ "step": 38
63
+ },
64
+ {
65
+ "epoch": 4.21,
66
+ "grad_norm": 24.757675170898438,
67
+ "learning_rate": 3.925619834710744e-05,
68
+ "loss": 0.0379,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 4.95,
73
+ "eval_loss": 0.002536825370043516,
74
+ "eval_runtime": 4.318,
75
+ "eval_samples_per_second": 30.802,
76
+ "eval_steps_per_second": 1.158,
77
+ "step": 47
78
+ },
79
+ {
80
+ "epoch": 5.26,
81
+ "grad_norm": 0.6816350221633911,
82
+ "learning_rate": 3.512396694214876e-05,
83
+ "loss": 0.0202,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 6.0,
88
+ "eval_loss": 0.0019522847142070532,
89
+ "eval_runtime": 4.4042,
90
+ "eval_samples_per_second": 30.199,
91
+ "eval_steps_per_second": 1.135,
92
+ "step": 57
93
+ },
94
+ {
95
+ "epoch": 6.32,
96
+ "grad_norm": 0.10703104734420776,
97
+ "learning_rate": 3.099173553719008e-05,
98
+ "loss": 0.0048,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 6.95,
103
+ "eval_loss": 0.00098791706841439,
104
+ "eval_runtime": 4.3638,
105
+ "eval_samples_per_second": 30.478,
106
+ "eval_steps_per_second": 1.146,
107
+ "step": 66
108
+ },
109
+ {
110
+ "epoch": 7.37,
111
+ "grad_norm": 4.492315292358398,
112
+ "learning_rate": 2.6859504132231405e-05,
113
+ "loss": 0.0056,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 8.0,
118
+ "eval_loss": 0.001134931342676282,
119
+ "eval_runtime": 4.3514,
120
+ "eval_samples_per_second": 30.565,
121
+ "eval_steps_per_second": 1.149,
122
+ "step": 76
123
+ },
124
+ {
125
+ "epoch": 8.42,
126
+ "grad_norm": 0.05526771396398544,
127
+ "learning_rate": 2.272727272727273e-05,
128
+ "loss": 0.0011,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 8.95,
133
+ "eval_loss": 0.0004670162743423134,
134
+ "eval_runtime": 4.379,
135
+ "eval_samples_per_second": 30.372,
136
+ "eval_steps_per_second": 1.142,
137
+ "step": 85
138
+ },
139
+ {
140
+ "epoch": 9.47,
141
+ "grad_norm": 2.763624906539917,
142
+ "learning_rate": 1.859504132231405e-05,
143
+ "loss": 0.0017,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 10.0,
148
+ "eval_loss": 0.0014447210123762488,
149
+ "eval_runtime": 4.3565,
150
+ "eval_samples_per_second": 30.529,
151
+ "eval_steps_per_second": 1.148,
152
+ "step": 95
153
+ },
154
+ {
155
+ "epoch": 10.53,
156
+ "grad_norm": 0.29414886236190796,
157
+ "learning_rate": 1.4462809917355372e-05,
158
+ "loss": 0.0076,
159
+ "step": 100
160
+ },
161
+ {
162
+ "epoch": 10.95,
163
+ "eval_loss": 0.0004167805891484022,
164
+ "eval_runtime": 4.5875,
165
+ "eval_samples_per_second": 28.992,
166
+ "eval_steps_per_second": 1.09,
167
+ "step": 104
168
+ },
169
+ {
170
+ "epoch": 11.58,
171
+ "grad_norm": 0.02740568295121193,
172
+ "learning_rate": 1.0330578512396695e-05,
173
+ "loss": 0.0018,
174
+ "step": 110
175
+ },
176
+ {
177
+ "epoch": 12.0,
178
+ "eval_loss": 0.0003142206114716828,
179
+ "eval_runtime": 4.7253,
180
+ "eval_samples_per_second": 28.146,
181
+ "eval_steps_per_second": 1.058,
182
+ "step": 114
183
+ },
184
+ {
185
+ "epoch": 12.63,
186
+ "grad_norm": 0.06253942847251892,
187
+ "learning_rate": 6.198347107438017e-06,
188
+ "loss": 0.0027,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 12.95,
193
+ "eval_loss": 0.0002625222550705075,
194
+ "eval_runtime": 4.4194,
195
+ "eval_samples_per_second": 30.094,
196
+ "eval_steps_per_second": 1.131,
197
+ "step": 123
198
+ },
199
+ {
200
+ "epoch": 13.68,
201
+ "grad_norm": 0.3781050741672516,
202
+ "learning_rate": 2.066115702479339e-06,
203
+ "loss": 0.0008,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 14.0,
208
+ "eval_loss": 0.00026170219643972814,
209
+ "eval_runtime": 4.4435,
210
+ "eval_samples_per_second": 29.931,
211
+ "eval_steps_per_second": 1.125,
212
+ "step": 133
213
+ },
214
+ {
215
+ "epoch": 14.21,
216
+ "eval_loss": 0.0002610796655062586,
217
+ "eval_runtime": 4.4227,
218
+ "eval_samples_per_second": 30.072,
219
+ "eval_steps_per_second": 1.131,
220
+ "step": 135
221
+ },
222
+ {
223
+ "epoch": 14.21,
224
+ "step": 135,
225
+ "total_flos": 3.9447179555061105e+18,
226
+ "train_loss": 0.08187244446534249,
227
+ "train_runtime": 1269.7055,
228
+ "train_samples_per_second": 14.106,
229
+ "train_steps_per_second": 0.106
230
+ }
231
+ ],
232
+ "logging_steps": 10,
233
+ "max_steps": 135,
234
+ "num_input_tokens_seen": 0,
235
+ "num_train_epochs": 15,
236
+ "save_steps": 500,
237
+ "total_flos": 3.9447179555061105e+18,
238
+ "train_batch_size": 32,
239
+ "trial_name": null,
240
+ "trial_params": null
241
+ }