MedAliBH commited on
Commit
46c476a
·
1 Parent(s): 32d39bc

Upload language classifier cnn

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model/model_weights filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model/logs/*
2
+ model/training_checkpoints/*
description.json ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "command": "/usr/local/bin/ludwig experiment --config /src/language-classifier.yaml --dataset /data/data.json --output_directory /results",
3
+ "compute": {
4
+ "num_nodes": 1
5
+ },
6
+ "config": {
7
+ "backend": null,
8
+ "combiner": {
9
+ "activation": "relu",
10
+ "bias_initializer": "zeros",
11
+ "dropout": 0.0,
12
+ "fc_layers": null,
13
+ "flatten_inputs": false,
14
+ "norm": null,
15
+ "norm_params": null,
16
+ "num_fc_layers": 0,
17
+ "output_size": 256,
18
+ "residual": false,
19
+ "type": "concat",
20
+ "use_bias": true,
21
+ "weights_initializer": "xavier_uniform"
22
+ },
23
+ "defaults": {
24
+ "audio": {
25
+ "encoder": {
26
+ "activation": "relu",
27
+ "bias_initializer": "zeros",
28
+ "conv_layers": null,
29
+ "dropout": 0.0,
30
+ "embedding_size": 256,
31
+ "embeddings_on_cpu": false,
32
+ "embeddings_trainable": true,
33
+ "fc_layers": null,
34
+ "filter_size": 3,
35
+ "max_sequence_length": null,
36
+ "norm": null,
37
+ "norm_params": null,
38
+ "num_conv_layers": null,
39
+ "num_fc_layers": null,
40
+ "num_filters": 256,
41
+ "output_size": 256,
42
+ "pool_function": "max",
43
+ "pool_size": null,
44
+ "pretrained_embeddings": null,
45
+ "reduce_output": "sum",
46
+ "representation": "dense",
47
+ "should_embed": true,
48
+ "skip": false,
49
+ "type": "parallel_cnn",
50
+ "use_bias": true,
51
+ "vocab": null,
52
+ "weights_initializer": "xavier_uniform"
53
+ },
54
+ "preprocessing": {
55
+ "audio_file_length_limit_in_s": 7.5,
56
+ "computed_fill_value": null,
57
+ "fill_value": null,
58
+ "in_memory": true,
59
+ "missing_value_strategy": "bfill",
60
+ "norm": null,
61
+ "num_fft_points": null,
62
+ "num_filter_bands": 80,
63
+ "padding_value": 0.0,
64
+ "type": "fbank",
65
+ "window_length_in_s": 0.04,
66
+ "window_shift_in_s": 0.02,
67
+ "window_type": "hamming"
68
+ }
69
+ },
70
+ "bag": {
71
+ "encoder": {
72
+ "activation": "relu",
73
+ "bias_initializer": "zeros",
74
+ "dropout": 0.0,
75
+ "embedding_size": 50,
76
+ "embeddings_on_cpu": false,
77
+ "embeddings_trainable": true,
78
+ "fc_layers": null,
79
+ "force_embedding_size": false,
80
+ "norm": null,
81
+ "norm_params": null,
82
+ "num_fc_layers": 0,
83
+ "output_size": 10,
84
+ "pretrained_embeddings": null,
85
+ "representation": "dense",
86
+ "skip": false,
87
+ "type": "embed",
88
+ "use_bias": true,
89
+ "vocab": null,
90
+ "weights_initializer": "xavier_uniform"
91
+ },
92
+ "preprocessing": {
93
+ "computed_fill_value": "<UNK>",
94
+ "fill_value": "<UNK>",
95
+ "lowercase": false,
96
+ "missing_value_strategy": "fill_with_const",
97
+ "most_common": 10000,
98
+ "tokenizer": "space"
99
+ }
100
+ },
101
+ "binary": {
102
+ "decoder": {
103
+ "bias_initializer": "zeros",
104
+ "fc_activation": "relu",
105
+ "fc_bias_initializer": "zeros",
106
+ "fc_dropout": 0.0,
107
+ "fc_layers": null,
108
+ "fc_norm": null,
109
+ "fc_norm_params": null,
110
+ "fc_output_size": 256,
111
+ "fc_use_bias": true,
112
+ "fc_weights_initializer": "xavier_uniform",
113
+ "input_size": null,
114
+ "num_fc_layers": 0,
115
+ "type": "regressor",
116
+ "use_bias": true,
117
+ "weights_initializer": "xavier_uniform"
118
+ },
119
+ "encoder": {
120
+ "skip": false,
121
+ "type": "passthrough"
122
+ },
123
+ "loss": {
124
+ "confidence_penalty": 0,
125
+ "positive_class_weight": null,
126
+ "robust_lambda": 0,
127
+ "type": "binary_weighted_cross_entropy",
128
+ "weight": 1.0
129
+ },
130
+ "preprocessing": {
131
+ "computed_fill_value": null,
132
+ "fallback_true_label": null,
133
+ "fill_value": null,
134
+ "missing_value_strategy": "fill_with_false"
135
+ }
136
+ },
137
+ "category": {
138
+ "decoder": {
139
+ "bias_initializer": "zeros",
140
+ "fc_activation": "relu",
141
+ "fc_bias_initializer": "zeros",
142
+ "fc_dropout": 0.0,
143
+ "fc_layers": null,
144
+ "fc_norm": null,
145
+ "fc_norm_params": null,
146
+ "fc_output_size": 256,
147
+ "fc_use_bias": true,
148
+ "fc_weights_initializer": "xavier_uniform",
149
+ "input_size": null,
150
+ "num_classes": null,
151
+ "num_fc_layers": 0,
152
+ "type": "classifier",
153
+ "use_bias": true,
154
+ "weights_initializer": "xavier_uniform"
155
+ },
156
+ "encoder": {
157
+ "dropout": 0.0,
158
+ "embedding_initializer": null,
159
+ "embedding_size": 50,
160
+ "embeddings_on_cpu": false,
161
+ "embeddings_trainable": true,
162
+ "pretrained_embeddings": null,
163
+ "skip": false,
164
+ "type": "dense",
165
+ "vocab": null
166
+ },
167
+ "loss": {
168
+ "class_similarities": null,
169
+ "class_similarities_temperature": 0,
170
+ "class_weights": null,
171
+ "confidence_penalty": 0,
172
+ "robust_lambda": 0,
173
+ "type": "softmax_cross_entropy",
174
+ "weight": 1.0
175
+ },
176
+ "preprocessing": {
177
+ "cache_encoder_embeddings": false,
178
+ "computed_fill_value": "<UNK>",
179
+ "fill_value": "<UNK>",
180
+ "lowercase": false,
181
+ "missing_value_strategy": "fill_with_const",
182
+ "most_common": 10000
183
+ }
184
+ },
185
+ "date": {
186
+ "encoder": {
187
+ "activation": "relu",
188
+ "bias_initializer": "zeros",
189
+ "dropout": 0.0,
190
+ "embedding_size": 10,
191
+ "embeddings_on_cpu": false,
192
+ "fc_layers": null,
193
+ "norm": null,
194
+ "norm_params": null,
195
+ "num_fc_layers": 0,
196
+ "output_size": 10,
197
+ "skip": false,
198
+ "type": "embed",
199
+ "use_bias": true,
200
+ "weights_initializer": "xavier_uniform"
201
+ },
202
+ "preprocessing": {
203
+ "computed_fill_value": "",
204
+ "datetime_format": null,
205
+ "fill_value": "",
206
+ "missing_value_strategy": "fill_with_const"
207
+ }
208
+ },
209
+ "h3": {
210
+ "encoder": {
211
+ "activation": "relu",
212
+ "bias_initializer": "zeros",
213
+ "dropout": 0.0,
214
+ "embedding_size": 10,
215
+ "embeddings_on_cpu": false,
216
+ "fc_layers": null,
217
+ "norm": null,
218
+ "norm_params": null,
219
+ "num_fc_layers": 0,
220
+ "output_size": 10,
221
+ "reduce_output": "sum",
222
+ "skip": false,
223
+ "type": "embed",
224
+ "use_bias": true,
225
+ "weights_initializer": "xavier_uniform"
226
+ },
227
+ "preprocessing": {
228
+ "computed_fill_value": 576495936675512319,
229
+ "fill_value": 576495936675512319,
230
+ "missing_value_strategy": "fill_with_const"
231
+ }
232
+ },
233
+ "image": {
234
+ "augmentation": [],
235
+ "decoder": {
236
+ "conv_norm": "batch",
237
+ "fc_activation": "relu",
238
+ "fc_bias_initializer": "zeros",
239
+ "fc_dropout": 0.0,
240
+ "fc_layers": null,
241
+ "fc_norm": null,
242
+ "fc_norm_params": null,
243
+ "fc_output_size": 256,
244
+ "fc_use_bias": true,
245
+ "fc_weights_initializer": "xavier_uniform",
246
+ "height": null,
247
+ "input_size": 1024,
248
+ "num_channels": null,
249
+ "num_classes": null,
250
+ "num_fc_layers": 0,
251
+ "type": "unet",
252
+ "width": null
253
+ },
254
+ "encoder": {
255
+ "conv_activation": "relu",
256
+ "conv_dropout": 0.0,
257
+ "conv_layers": null,
258
+ "conv_norm": null,
259
+ "conv_norm_params": null,
260
+ "conv_use_bias": true,
261
+ "dilation": 1,
262
+ "fc_activation": "relu",
263
+ "fc_bias_initializer": "zeros",
264
+ "fc_dropout": 0.0,
265
+ "fc_layers": null,
266
+ "fc_norm": null,
267
+ "fc_norm_params": null,
268
+ "fc_use_bias": true,
269
+ "fc_weights_initializer": "xavier_uniform",
270
+ "groups": 1,
271
+ "height": null,
272
+ "kernel_size": 3,
273
+ "num_channels": null,
274
+ "num_conv_layers": null,
275
+ "num_fc_layers": 1,
276
+ "out_channels": 32,
277
+ "output_size": 128,
278
+ "padding": "valid",
279
+ "padding_mode": "zeros",
280
+ "pool_dilation": 1,
281
+ "pool_function": "max",
282
+ "pool_kernel_size": 2,
283
+ "pool_padding": 0,
284
+ "pool_stride": null,
285
+ "skip": false,
286
+ "stride": 1,
287
+ "type": "stacked_cnn",
288
+ "width": null
289
+ },
290
+ "loss": {
291
+ "class_similarities": null,
292
+ "class_similarities_temperature": 0,
293
+ "class_weights": null,
294
+ "confidence_penalty": 0,
295
+ "robust_lambda": 0,
296
+ "type": "softmax_cross_entropy",
297
+ "weight": 1.0
298
+ },
299
+ "preprocessing": {
300
+ "computed_fill_value": null,
301
+ "fill_value": null,
302
+ "height": null,
303
+ "in_memory": true,
304
+ "infer_image_dimensions": true,
305
+ "infer_image_max_height": 256,
306
+ "infer_image_max_width": 256,
307
+ "infer_image_num_channels": true,
308
+ "infer_image_num_classes": false,
309
+ "infer_image_sample_size": 100,
310
+ "missing_value_strategy": "bfill",
311
+ "num_channels": null,
312
+ "num_classes": null,
313
+ "num_processes": 1,
314
+ "requires_equal_dimensions": false,
315
+ "resize_method": "interpolate",
316
+ "standardize_image": null,
317
+ "width": null
318
+ }
319
+ },
320
+ "number": {
321
+ "decoder": {
322
+ "bias_initializer": "zeros",
323
+ "fc_activation": "relu",
324
+ "fc_bias_initializer": "zeros",
325
+ "fc_dropout": 0.0,
326
+ "fc_layers": null,
327
+ "fc_norm": null,
328
+ "fc_norm_params": null,
329
+ "fc_output_size": 256,
330
+ "fc_use_bias": true,
331
+ "fc_weights_initializer": "xavier_uniform",
332
+ "input_size": null,
333
+ "num_fc_layers": 0,
334
+ "type": "regressor",
335
+ "use_bias": true,
336
+ "weights_initializer": "xavier_uniform"
337
+ },
338
+ "encoder": {
339
+ "skip": false,
340
+ "type": "passthrough"
341
+ },
342
+ "loss": {
343
+ "type": "mean_squared_error",
344
+ "weight": 1.0
345
+ },
346
+ "preprocessing": {
347
+ "computed_fill_value": 0.0,
348
+ "computed_outlier_fill_value": 0.0,
349
+ "fill_value": 0.0,
350
+ "missing_value_strategy": "fill_with_const",
351
+ "normalization": "zscore",
352
+ "outlier_strategy": null,
353
+ "outlier_threshold": 3.0
354
+ }
355
+ },
356
+ "sequence": {
357
+ "decoder": {
358
+ "cell_type": "gru",
359
+ "fc_activation": "relu",
360
+ "fc_bias_initializer": "zeros",
361
+ "fc_dropout": 0.0,
362
+ "fc_layers": null,
363
+ "fc_norm": null,
364
+ "fc_norm_params": null,
365
+ "fc_output_size": 256,
366
+ "fc_use_bias": true,
367
+ "fc_weights_initializer": "xavier_uniform",
368
+ "input_size": 256,
369
+ "max_sequence_length": null,
370
+ "num_fc_layers": 0,
371
+ "num_layers": 1,
372
+ "reduce_input": "sum",
373
+ "type": "generator",
374
+ "vocab_size": null
375
+ },
376
+ "encoder": {
377
+ "dropout": 0.0,
378
+ "embedding_size": 256,
379
+ "embeddings_on_cpu": false,
380
+ "embeddings_trainable": true,
381
+ "max_sequence_length": null,
382
+ "pretrained_embeddings": null,
383
+ "reduce_output": "sum",
384
+ "representation": "dense",
385
+ "skip": false,
386
+ "type": "embed",
387
+ "vocab": null,
388
+ "weights_initializer": "uniform"
389
+ },
390
+ "loss": {
391
+ "class_similarities": null,
392
+ "class_similarities_temperature": 0,
393
+ "class_weights": null,
394
+ "confidence_penalty": 0,
395
+ "robust_lambda": 0,
396
+ "type": "sequence_softmax_cross_entropy",
397
+ "unique": false,
398
+ "weight": 1.0
399
+ },
400
+ "preprocessing": {
401
+ "cache_encoder_embeddings": false,
402
+ "computed_fill_value": "<UNK>",
403
+ "fill_value": "<UNK>",
404
+ "lowercase": false,
405
+ "max_sequence_length": 256,
406
+ "missing_value_strategy": "fill_with_const",
407
+ "most_common": 20000,
408
+ "ngram_size": 2,
409
+ "padding": "right",
410
+ "padding_symbol": "<PAD>",
411
+ "sequence_length": null,
412
+ "tokenizer": "space",
413
+ "unknown_symbol": "<UNK>",
414
+ "vocab_file": null
415
+ }
416
+ },
417
+ "set": {
418
+ "decoder": {
419
+ "bias_initializer": "zeros",
420
+ "fc_activation": "relu",
421
+ "fc_bias_initializer": "zeros",
422
+ "fc_dropout": 0.0,
423
+ "fc_layers": null,
424
+ "fc_norm": null,
425
+ "fc_norm_params": null,
426
+ "fc_output_size": 256,
427
+ "fc_use_bias": true,
428
+ "fc_weights_initializer": "xavier_uniform",
429
+ "input_size": null,
430
+ "num_classes": null,
431
+ "num_fc_layers": 0,
432
+ "type": "classifier",
433
+ "use_bias": true,
434
+ "weights_initializer": "xavier_uniform"
435
+ },
436
+ "encoder": {
437
+ "activation": "relu",
438
+ "bias_initializer": "zeros",
439
+ "dropout": 0.0,
440
+ "embedding_size": 50,
441
+ "embeddings_on_cpu": false,
442
+ "embeddings_trainable": true,
443
+ "fc_layers": null,
444
+ "norm": null,
445
+ "norm_params": null,
446
+ "num_fc_layers": 0,
447
+ "output_size": 10,
448
+ "pretrained_embeddings": null,
449
+ "representation": "dense",
450
+ "skip": false,
451
+ "type": "embed",
452
+ "use_bias": true,
453
+ "vocab": null,
454
+ "weights_initializer": "xavier_uniform"
455
+ },
456
+ "loss": {
457
+ "class_weights": null,
458
+ "type": "sigmoid_cross_entropy",
459
+ "weight": 1.0
460
+ },
461
+ "preprocessing": {
462
+ "computed_fill_value": "<UNK>",
463
+ "fill_value": "<UNK>",
464
+ "lowercase": false,
465
+ "missing_value_strategy": "fill_with_const",
466
+ "most_common": 10000,
467
+ "tokenizer": "space"
468
+ }
469
+ },
470
+ "text": {
471
+ "decoder": {
472
+ "cell_type": "gru",
473
+ "fc_activation": "relu",
474
+ "fc_bias_initializer": "zeros",
475
+ "fc_dropout": 0.0,
476
+ "fc_layers": null,
477
+ "fc_norm": null,
478
+ "fc_norm_params": null,
479
+ "fc_output_size": 256,
480
+ "fc_use_bias": true,
481
+ "fc_weights_initializer": "xavier_uniform",
482
+ "input_size": 256,
483
+ "max_sequence_length": null,
484
+ "num_fc_layers": 0,
485
+ "num_layers": 1,
486
+ "reduce_input": "sum",
487
+ "type": "generator",
488
+ "vocab_size": null
489
+ },
490
+ "encoder": {
491
+ "activation": "relu",
492
+ "bias_initializer": "zeros",
493
+ "conv_layers": null,
494
+ "dropout": 0.0,
495
+ "embedding_size": 256,
496
+ "embeddings_on_cpu": false,
497
+ "embeddings_trainable": true,
498
+ "fc_layers": null,
499
+ "filter_size": 3,
500
+ "max_sequence_length": null,
501
+ "norm": null,
502
+ "norm_params": null,
503
+ "num_conv_layers": null,
504
+ "num_fc_layers": null,
505
+ "num_filters": 256,
506
+ "output_size": 256,
507
+ "pool_function": "max",
508
+ "pool_size": null,
509
+ "pretrained_embeddings": null,
510
+ "reduce_output": "sum",
511
+ "representation": "dense",
512
+ "should_embed": true,
513
+ "skip": false,
514
+ "type": "parallel_cnn",
515
+ "use_bias": true,
516
+ "vocab": null,
517
+ "weights_initializer": "xavier_uniform"
518
+ },
519
+ "loss": {
520
+ "class_similarities": null,
521
+ "class_similarities_temperature": 0,
522
+ "class_weights": null,
523
+ "confidence_penalty": 0,
524
+ "robust_lambda": 0,
525
+ "type": "sequence_softmax_cross_entropy",
526
+ "unique": false,
527
+ "weight": 1.0
528
+ },
529
+ "preprocessing": {
530
+ "cache_encoder_embeddings": false,
531
+ "compute_idf": false,
532
+ "computed_fill_value": "<UNK>",
533
+ "fill_value": "<UNK>",
534
+ "lowercase": false,
535
+ "max_sequence_length": 256,
536
+ "missing_value_strategy": "fill_with_const",
537
+ "most_common": 20000,
538
+ "ngram_size": 2,
539
+ "padding": "right",
540
+ "padding_symbol": "<PAD>",
541
+ "pretrained_model_name_or_path": null,
542
+ "prompt": {
543
+ "retrieval": {
544
+ "index_name": null,
545
+ "k": 0,
546
+ "model_name": null,
547
+ "type": null
548
+ },
549
+ "task": null,
550
+ "template": null
551
+ },
552
+ "sequence_length": null,
553
+ "tokenizer": "space_punct",
554
+ "unknown_symbol": "<UNK>",
555
+ "vocab_file": null
556
+ }
557
+ },
558
+ "timeseries": {
559
+ "decoder": {
560
+ "activation": null,
561
+ "bias_initializer": "zeros",
562
+ "clip": null,
563
+ "fc_activation": "relu",
564
+ "fc_bias_initializer": "zeros",
565
+ "fc_dropout": 0.0,
566
+ "fc_layers": null,
567
+ "fc_norm": null,
568
+ "fc_norm_params": null,
569
+ "fc_output_size": 256,
570
+ "fc_use_bias": true,
571
+ "fc_weights_initializer": "xavier_uniform",
572
+ "input_size": null,
573
+ "multiplier": 1.0,
574
+ "num_fc_layers": 0,
575
+ "output_size": null,
576
+ "type": "projector",
577
+ "use_bias": true,
578
+ "weights_initializer": "xavier_uniform"
579
+ },
580
+ "encoder": {
581
+ "activation": "relu",
582
+ "bias_initializer": "zeros",
583
+ "conv_layers": null,
584
+ "dropout": 0.0,
585
+ "embedding_size": 256,
586
+ "embeddings_on_cpu": false,
587
+ "embeddings_trainable": true,
588
+ "fc_layers": null,
589
+ "filter_size": 3,
590
+ "max_sequence_length": null,
591
+ "norm": null,
592
+ "norm_params": null,
593
+ "num_conv_layers": null,
594
+ "num_fc_layers": null,
595
+ "num_filters": 256,
596
+ "output_size": 256,
597
+ "pool_function": "max",
598
+ "pool_size": null,
599
+ "pretrained_embeddings": null,
600
+ "reduce_output": "sum",
601
+ "representation": "dense",
602
+ "should_embed": true,
603
+ "skip": false,
604
+ "type": "parallel_cnn",
605
+ "use_bias": true,
606
+ "vocab": null,
607
+ "weights_initializer": "xavier_uniform"
608
+ },
609
+ "loss": {
610
+ "delta": 1.0,
611
+ "type": "huber",
612
+ "weight": 1.0
613
+ },
614
+ "preprocessing": {
615
+ "computed_fill_value": "",
616
+ "fill_value": "",
617
+ "missing_value_strategy": "fill_with_const",
618
+ "padding": "right",
619
+ "padding_value": 0.0,
620
+ "timeseries_length_limit": 256,
621
+ "tokenizer": "space",
622
+ "window_size": 0
623
+ }
624
+ },
625
+ "vector": {
626
+ "decoder": {
627
+ "activation": null,
628
+ "bias_initializer": "zeros",
629
+ "clip": null,
630
+ "fc_activation": "relu",
631
+ "fc_bias_initializer": "zeros",
632
+ "fc_dropout": 0.0,
633
+ "fc_layers": null,
634
+ "fc_norm": null,
635
+ "fc_norm_params": null,
636
+ "fc_output_size": 256,
637
+ "fc_use_bias": true,
638
+ "fc_weights_initializer": "xavier_uniform",
639
+ "input_size": null,
640
+ "multiplier": 1.0,
641
+ "num_fc_layers": 0,
642
+ "output_size": null,
643
+ "type": "projector",
644
+ "use_bias": true,
645
+ "weights_initializer": "xavier_uniform"
646
+ },
647
+ "encoder": {
648
+ "activation": "relu",
649
+ "bias_initializer": "zeros",
650
+ "dropout": 0.0,
651
+ "fc_layers": null,
652
+ "input_size": null,
653
+ "norm": null,
654
+ "norm_params": null,
655
+ "num_layers": 1,
656
+ "output_size": 256,
657
+ "skip": false,
658
+ "type": "dense",
659
+ "use_bias": true,
660
+ "weights_initializer": "xavier_uniform"
661
+ },
662
+ "loss": {
663
+ "type": "mean_squared_error",
664
+ "weight": 1.0
665
+ },
666
+ "preprocessing": {
667
+ "computed_fill_value": "",
668
+ "fill_value": "",
669
+ "missing_value_strategy": "fill_with_const",
670
+ "vector_size": null
671
+ }
672
+ }
673
+ },
674
+ "hyperopt": null,
675
+ "input_features": [
676
+ {
677
+ "active": true,
678
+ "column": "text",
679
+ "encoder": {
680
+ "activation": "relu",
681
+ "bias_initializer": "zeros",
682
+ "conv_layers": null,
683
+ "dropout": 0.0,
684
+ "embedding_size": 256,
685
+ "embeddings_on_cpu": false,
686
+ "embeddings_trainable": true,
687
+ "fc_layers": null,
688
+ "filter_size": 3,
689
+ "max_sequence_length": null,
690
+ "norm": null,
691
+ "norm_params": null,
692
+ "num_conv_layers": null,
693
+ "num_fc_layers": null,
694
+ "num_filters": 256,
695
+ "output_size": 256,
696
+ "pool_function": "max",
697
+ "pool_size": null,
698
+ "pretrained_embeddings": null,
699
+ "reduce_output": "sum",
700
+ "representation": "dense",
701
+ "should_embed": true,
702
+ "skip": false,
703
+ "type": "parallel_cnn",
704
+ "use_bias": true,
705
+ "vocab": null,
706
+ "weights_initializer": "xavier_uniform"
707
+ },
708
+ "name": "text",
709
+ "preprocessing": {
710
+ "cache_encoder_embeddings": false,
711
+ "compute_idf": false,
712
+ "computed_fill_value": "<UNK>",
713
+ "fill_value": "<UNK>",
714
+ "lowercase": true,
715
+ "max_sequence_length": 512,
716
+ "missing_value_strategy": "fill_with_const",
717
+ "most_common": 20000,
718
+ "ngram_size": 2,
719
+ "padding": "right",
720
+ "padding_symbol": "<PAD>",
721
+ "pretrained_model_name_or_path": null,
722
+ "prompt": {
723
+ "retrieval": {
724
+ "index_name": null,
725
+ "k": 0,
726
+ "model_name": null,
727
+ "type": null
728
+ },
729
+ "task": null,
730
+ "template": null
731
+ },
732
+ "sequence_length": null,
733
+ "tokenizer": "ngram",
734
+ "unknown_symbol": "<UNK>",
735
+ "vocab_file": null
736
+ },
737
+ "proc_column": "text_VQBTaW",
738
+ "tied": null,
739
+ "type": "text"
740
+ }
741
+ ],
742
+ "ludwig_version": "0.10.2.dev",
743
+ "model_type": "ecd",
744
+ "output_features": [
745
+ {
746
+ "active": true,
747
+ "calibration": false,
748
+ "column": "language",
749
+ "decoder": {
750
+ "bias_initializer": "zeros",
751
+ "fc_activation": "relu",
752
+ "fc_bias_initializer": "zeros",
753
+ "fc_dropout": 0.0,
754
+ "fc_layers": null,
755
+ "fc_norm": null,
756
+ "fc_norm_params": null,
757
+ "fc_output_size": 256,
758
+ "fc_use_bias": true,
759
+ "fc_weights_initializer": "xavier_uniform",
760
+ "input_size": null,
761
+ "num_classes": null,
762
+ "num_fc_layers": 0,
763
+ "type": "classifier",
764
+ "use_bias": true,
765
+ "weights_initializer": "xavier_uniform"
766
+ },
767
+ "default_validation_metric": "accuracy",
768
+ "dependencies": [],
769
+ "input_size": null,
770
+ "loss": {
771
+ "class_similarities": null,
772
+ "class_similarities_temperature": 0,
773
+ "class_weights": null,
774
+ "confidence_penalty": 0,
775
+ "robust_lambda": 0,
776
+ "type": "softmax_cross_entropy",
777
+ "weight": 1.0
778
+ },
779
+ "name": "language",
780
+ "num_classes": null,
781
+ "preprocessing": {
782
+ "cache_encoder_embeddings": false,
783
+ "computed_fill_value": "<UNK>",
784
+ "fill_value": "<UNK>",
785
+ "lowercase": false,
786
+ "missing_value_strategy": "drop_row",
787
+ "most_common": 10000
788
+ },
789
+ "proc_column": "language_YwJjWN",
790
+ "reduce_dependencies": "sum",
791
+ "reduce_input": "sum",
792
+ "top_k": 3,
793
+ "type": "category"
794
+ }
795
+ ],
796
+ "preprocessing": {
797
+ "global_max_sequence_length": null,
798
+ "oversample_minority": null,
799
+ "sample_ratio": 1.0,
800
+ "sample_size": null,
801
+ "split": {
802
+ "column": "language",
803
+ "probabilities": [
804
+ 0.85,
805
+ 0.1,
806
+ 0.05
807
+ ],
808
+ "type": "stratify"
809
+ },
810
+ "undersample_majority": null
811
+ },
812
+ "trainer": {
813
+ "batch_size": "auto",
814
+ "bucketing_field": null,
815
+ "checkpoints_per_epoch": 0,
816
+ "compile": false,
817
+ "early_stop": 0,
818
+ "effective_batch_size": "auto",
819
+ "enable_gradient_checkpointing": false,
820
+ "enable_profiling": false,
821
+ "epochs": 5,
822
+ "eval_batch_size": null,
823
+ "eval_steps": null,
824
+ "evaluate_training_set": false,
825
+ "gradient_accumulation_steps": "auto",
826
+ "gradient_clipping": {
827
+ "clipglobalnorm": 0.5,
828
+ "clipnorm": null,
829
+ "clipvalue": null
830
+ },
831
+ "increase_batch_size_eval_metric": "loss",
832
+ "increase_batch_size_eval_split": "training",
833
+ "increase_batch_size_on_plateau": 0,
834
+ "increase_batch_size_on_plateau_patience": 5,
835
+ "increase_batch_size_on_plateau_rate": 2.0,
836
+ "learning_rate": 0.001,
837
+ "learning_rate_scaling": "linear",
838
+ "learning_rate_scheduler": {
839
+ "decay": null,
840
+ "decay_rate": 0.96,
841
+ "decay_steps": 10000,
842
+ "eta_min": 0,
843
+ "reduce_eval_metric": "loss",
844
+ "reduce_eval_split": "training",
845
+ "reduce_on_plateau": 0,
846
+ "reduce_on_plateau_patience": 10,
847
+ "reduce_on_plateau_rate": 0.1,
848
+ "staircase": false,
849
+ "t_0": null,
850
+ "t_mult": 1,
851
+ "warmup_evaluations": 0,
852
+ "warmup_fraction": 0.0
853
+ },
854
+ "max_batch_size": 1099511627776,
855
+ "optimizer": {
856
+ "amsgrad": false,
857
+ "betas": [
858
+ 0.9,
859
+ 0.999
860
+ ],
861
+ "eps": 1e-08,
862
+ "type": "adam",
863
+ "weight_decay": 0.0
864
+ },
865
+ "profiler": {
866
+ "active": 3,
867
+ "repeat": 5,
868
+ "skip_first": 0,
869
+ "wait": 1,
870
+ "warmup": 1
871
+ },
872
+ "regularization_lambda": 0.0,
873
+ "regularization_type": "l2",
874
+ "should_shuffle": true,
875
+ "skip_all_evaluation": false,
876
+ "steps_per_checkpoint": 0,
877
+ "train_steps": null,
878
+ "use_mixed_precision": false,
879
+ "validation_field": "language",
880
+ "validation_metric": "accuracy"
881
+ }
882
+ },
883
+ "data_format": "json",
884
+ "dataset": "/data/data.json",
885
+ "ludwig_version": "0.10.2.dev",
886
+ "random_seed": 42,
887
+ "torch_version": "2.0.0+cpu"
888
+ }
model/llm_eval_examples/0.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ inputs,targets,outputs
model/llm_eval_examples/2.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ inputs,targets,outputs
model/llm_eval_examples/4.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ inputs,targets,outputs
model/llm_eval_examples/6.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ inputs,targets,outputs
model/llm_eval_examples/8.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ inputs,targets,outputs
model/model_hyperparameters.json ADDED
The diff for this file is too large to render. See raw diff
 
model/model_weights ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccb2f0416fadce9c5e4e4d2cc528db6064ec4d6296c67f95ed137975db95c66
3
+ size 17302302
model/training_progress.json ADDED
@@ -0,0 +1,1461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "batch_size": 64,
3
+ "best_eval_metric_checkpoint_number": 6,
4
+ "best_eval_metric_epoch": 4,
5
+ "best_eval_metric_steps": 372,
6
+ "best_eval_metric_value": 0.9799168109893799,
7
+ "best_eval_test_metrics": {
8
+ "combined": {
9
+ "loss": 0.07570453733205795
10
+ },
11
+ "language": {
12
+ "accuracy": 0.9817298650741577,
13
+ "accuracy_micro": 0.9799426794052124,
14
+ "loss": 0.07570453733205795,
15
+ "roc_auc": 0.999114990234375
16
+ }
17
+ },
18
+ "best_eval_train_metrics": {
19
+ "combined": {
20
+ "loss": 0.03008267655968666
21
+ },
22
+ "language": {
23
+ "accuracy": 0.9913396835327148,
24
+ "accuracy_micro": 0.9910488128662109,
25
+ "loss": 0.03008267655968666,
26
+ "roc_auc": 0.9997660517692566
27
+ }
28
+ },
29
+ "best_eval_validation_metrics": {
30
+ "combined": {
31
+ "loss": 0.06920339167118073
32
+ },
33
+ "language": {
34
+ "accuracy": 0.9799168109893799,
35
+ "accuracy_micro": 0.9784792065620422,
36
+ "loss": 0.06920339167118073,
37
+ "roc_auc": 0.998852550983429
38
+ }
39
+ },
40
+ "best_increase_batch_size_eval_metric": Infinity,
41
+ "checkpoint_number": 10,
42
+ "checkpoint_to_epoch": {
43
+ "1": 1,
44
+ "10": 5,
45
+ "2": 1,
46
+ "3": 2,
47
+ "4": 2,
48
+ "5": 3,
49
+ "6": 3,
50
+ "7": 4,
51
+ "8": 4,
52
+ "9": 5
53
+ },
54
+ "checkpoint_to_step": {
55
+ "1": 93,
56
+ "10": 465,
57
+ "2": 93,
58
+ "3": 186,
59
+ "4": 186,
60
+ "5": 279,
61
+ "6": 279,
62
+ "7": 372,
63
+ "8": 372,
64
+ "9": 465
65
+ },
66
+ "cumulative_checkpoint_token_usage": {
67
+ "1": 2356558,
68
+ "10": 11782790,
69
+ "2": 2356558,
70
+ "3": 4713116,
71
+ "4": 4713116,
72
+ "5": 7069674,
73
+ "6": 7069674,
74
+ "7": 9426232,
75
+ "8": 9426232,
76
+ "9": 11782790
77
+ },
78
+ "cumulative_step_token_usage": {
79
+ "0": 25472,
80
+ "1": 50944,
81
+ "10": 280192,
82
+ "100": 2560334,
83
+ "101": 2585806,
84
+ "102": 2611278,
85
+ "103": 2636750,
86
+ "104": 2662222,
87
+ "105": 2687694,
88
+ "106": 2713166,
89
+ "107": 2738638,
90
+ "108": 2764110,
91
+ "109": 2789582,
92
+ "11": 305664,
93
+ "110": 2815054,
94
+ "111": 2840526,
95
+ "112": 2865998,
96
+ "113": 2891470,
97
+ "114": 2916942,
98
+ "115": 2942414,
99
+ "116": 2967886,
100
+ "117": 2993358,
101
+ "118": 3018830,
102
+ "119": 3044302,
103
+ "12": 331136,
104
+ "120": 3069774,
105
+ "121": 3095246,
106
+ "122": 3120718,
107
+ "123": 3146190,
108
+ "124": 3171662,
109
+ "125": 3197134,
110
+ "126": 3222606,
111
+ "127": 3248078,
112
+ "128": 3273550,
113
+ "129": 3299022,
114
+ "13": 356608,
115
+ "130": 3324494,
116
+ "131": 3349966,
117
+ "132": 3375438,
118
+ "133": 3400910,
119
+ "134": 3426382,
120
+ "135": 3451854,
121
+ "136": 3477326,
122
+ "137": 3502798,
123
+ "138": 3528270,
124
+ "139": 3553742,
125
+ "14": 382080,
126
+ "140": 3579214,
127
+ "141": 3604686,
128
+ "142": 3630158,
129
+ "143": 3655630,
130
+ "144": 3681102,
131
+ "145": 3706574,
132
+ "146": 3732046,
133
+ "147": 3757518,
134
+ "148": 3782990,
135
+ "149": 3808462,
136
+ "15": 407552,
137
+ "150": 3833934,
138
+ "151": 3859406,
139
+ "152": 3884878,
140
+ "153": 3910350,
141
+ "154": 3935822,
142
+ "155": 3961294,
143
+ "156": 3986766,
144
+ "157": 4012238,
145
+ "158": 4037710,
146
+ "159": 4063182,
147
+ "16": 433024,
148
+ "160": 4088654,
149
+ "161": 4114126,
150
+ "162": 4139598,
151
+ "163": 4165070,
152
+ "164": 4190542,
153
+ "165": 4216014,
154
+ "166": 4241486,
155
+ "167": 4266958,
156
+ "168": 4292430,
157
+ "169": 4317902,
158
+ "17": 458496,
159
+ "170": 4343374,
160
+ "171": 4368846,
161
+ "172": 4394318,
162
+ "173": 4419790,
163
+ "174": 4445262,
164
+ "175": 4470734,
165
+ "176": 4496206,
166
+ "177": 4521678,
167
+ "178": 4547150,
168
+ "179": 4572622,
169
+ "18": 483968,
170
+ "180": 4598094,
171
+ "181": 4623566,
172
+ "182": 4649038,
173
+ "183": 4674510,
174
+ "184": 4699982,
175
+ "185": 4713116,
176
+ "186": 4738588,
177
+ "187": 4764060,
178
+ "188": 4789532,
179
+ "189": 4815004,
180
+ "19": 509440,
181
+ "190": 4840476,
182
+ "191": 4865948,
183
+ "192": 4891420,
184
+ "193": 4916892,
185
+ "194": 4942364,
186
+ "195": 4967836,
187
+ "196": 4993308,
188
+ "197": 5018780,
189
+ "198": 5044252,
190
+ "199": 5069724,
191
+ "2": 76416,
192
+ "20": 534912,
193
+ "200": 5095196,
194
+ "201": 5120668,
195
+ "202": 5146140,
196
+ "203": 5171612,
197
+ "204": 5197084,
198
+ "205": 5222556,
199
+ "206": 5248028,
200
+ "207": 5273500,
201
+ "208": 5298972,
202
+ "209": 5324444,
203
+ "21": 560384,
204
+ "210": 5349916,
205
+ "211": 5375388,
206
+ "212": 5400860,
207
+ "213": 5426332,
208
+ "214": 5451804,
209
+ "215": 5477276,
210
+ "216": 5502748,
211
+ "217": 5528220,
212
+ "218": 5553692,
213
+ "219": 5579164,
214
+ "22": 585856,
215
+ "220": 5604636,
216
+ "221": 5630108,
217
+ "222": 5655580,
218
+ "223": 5681052,
219
+ "224": 5706524,
220
+ "225": 5731996,
221
+ "226": 5757468,
222
+ "227": 5782940,
223
+ "228": 5808412,
224
+ "229": 5833884,
225
+ "23": 611328,
226
+ "230": 5859356,
227
+ "231": 5884828,
228
+ "232": 5910300,
229
+ "233": 5935772,
230
+ "234": 5961244,
231
+ "235": 5986716,
232
+ "236": 6012188,
233
+ "237": 6037660,
234
+ "238": 6063132,
235
+ "239": 6088604,
236
+ "24": 636800,
237
+ "240": 6114076,
238
+ "241": 6139548,
239
+ "242": 6165020,
240
+ "243": 6190492,
241
+ "244": 6215964,
242
+ "245": 6241436,
243
+ "246": 6266908,
244
+ "247": 6292380,
245
+ "248": 6317852,
246
+ "249": 6343324,
247
+ "25": 662272,
248
+ "250": 6368796,
249
+ "251": 6394268,
250
+ "252": 6419740,
251
+ "253": 6445212,
252
+ "254": 6470684,
253
+ "255": 6496156,
254
+ "256": 6521628,
255
+ "257": 6547100,
256
+ "258": 6572572,
257
+ "259": 6598044,
258
+ "26": 687744,
259
+ "260": 6623516,
260
+ "261": 6648988,
261
+ "262": 6674460,
262
+ "263": 6699932,
263
+ "264": 6725404,
264
+ "265": 6750876,
265
+ "266": 6776348,
266
+ "267": 6801820,
267
+ "268": 6827292,
268
+ "269": 6852764,
269
+ "27": 713216,
270
+ "270": 6878236,
271
+ "271": 6903708,
272
+ "272": 6929180,
273
+ "273": 6954652,
274
+ "274": 6980124,
275
+ "275": 7005596,
276
+ "276": 7031068,
277
+ "277": 7056540,
278
+ "278": 7069674,
279
+ "279": 7095146,
280
+ "28": 738688,
281
+ "280": 7120618,
282
+ "281": 7146090,
283
+ "282": 7171562,
284
+ "283": 7197034,
285
+ "284": 7222506,
286
+ "285": 7247978,
287
+ "286": 7273450,
288
+ "287": 7298922,
289
+ "288": 7324394,
290
+ "289": 7349866,
291
+ "29": 764160,
292
+ "290": 7375338,
293
+ "291": 7400810,
294
+ "292": 7426282,
295
+ "293": 7451754,
296
+ "294": 7477226,
297
+ "295": 7502698,
298
+ "296": 7528170,
299
+ "297": 7553642,
300
+ "298": 7579114,
301
+ "299": 7604586,
302
+ "3": 101888,
303
+ "30": 789632,
304
+ "300": 7630058,
305
+ "301": 7655530,
306
+ "302": 7681002,
307
+ "303": 7706474,
308
+ "304": 7731946,
309
+ "305": 7757418,
310
+ "306": 7782890,
311
+ "307": 7808362,
312
+ "308": 7833834,
313
+ "309": 7859306,
314
+ "31": 815104,
315
+ "310": 7884778,
316
+ "311": 7910250,
317
+ "312": 7935722,
318
+ "313": 7961194,
319
+ "314": 7986666,
320
+ "315": 8012138,
321
+ "316": 8037610,
322
+ "317": 8063082,
323
+ "318": 8088554,
324
+ "319": 8114026,
325
+ "32": 840576,
326
+ "320": 8139498,
327
+ "321": 8164970,
328
+ "322": 8190442,
329
+ "323": 8215914,
330
+ "324": 8241386,
331
+ "325": 8266858,
332
+ "326": 8292330,
333
+ "327": 8317802,
334
+ "328": 8343274,
335
+ "329": 8368746,
336
+ "33": 866048,
337
+ "330": 8394218,
338
+ "331": 8419690,
339
+ "332": 8445162,
340
+ "333": 8470634,
341
+ "334": 8496106,
342
+ "335": 8521578,
343
+ "336": 8547050,
344
+ "337": 8572522,
345
+ "338": 8597994,
346
+ "339": 8623466,
347
+ "34": 891520,
348
+ "340": 8648938,
349
+ "341": 8674410,
350
+ "342": 8699882,
351
+ "343": 8725354,
352
+ "344": 8750826,
353
+ "345": 8776298,
354
+ "346": 8801770,
355
+ "347": 8827242,
356
+ "348": 8852714,
357
+ "349": 8878186,
358
+ "35": 916992,
359
+ "350": 8903658,
360
+ "351": 8929130,
361
+ "352": 8954602,
362
+ "353": 8980074,
363
+ "354": 9005546,
364
+ "355": 9031018,
365
+ "356": 9056490,
366
+ "357": 9081962,
367
+ "358": 9107434,
368
+ "359": 9132906,
369
+ "36": 942464,
370
+ "360": 9158378,
371
+ "361": 9183850,
372
+ "362": 9209322,
373
+ "363": 9234794,
374
+ "364": 9260266,
375
+ "365": 9285738,
376
+ "366": 9311210,
377
+ "367": 9336682,
378
+ "368": 9362154,
379
+ "369": 9387626,
380
+ "37": 967936,
381
+ "370": 9413098,
382
+ "371": 9426232,
383
+ "372": 9451704,
384
+ "373": 9477176,
385
+ "374": 9502648,
386
+ "375": 9528120,
387
+ "376": 9553592,
388
+ "377": 9579064,
389
+ "378": 9604536,
390
+ "379": 9630008,
391
+ "38": 993408,
392
+ "380": 9655480,
393
+ "381": 9680952,
394
+ "382": 9706424,
395
+ "383": 9731896,
396
+ "384": 9757368,
397
+ "385": 9782840,
398
+ "386": 9808312,
399
+ "387": 9833784,
400
+ "388": 9859256,
401
+ "389": 9884728,
402
+ "39": 1018880,
403
+ "390": 9910200,
404
+ "391": 9935672,
405
+ "392": 9961144,
406
+ "393": 9986616,
407
+ "394": 10012088,
408
+ "395": 10037560,
409
+ "396": 10063032,
410
+ "397": 10088504,
411
+ "398": 10113976,
412
+ "399": 10139448,
413
+ "4": 127360,
414
+ "40": 1044352,
415
+ "400": 10164920,
416
+ "401": 10190392,
417
+ "402": 10215864,
418
+ "403": 10241336,
419
+ "404": 10266808,
420
+ "405": 10292280,
421
+ "406": 10317752,
422
+ "407": 10343224,
423
+ "408": 10368696,
424
+ "409": 10394168,
425
+ "41": 1069824,
426
+ "410": 10419640,
427
+ "411": 10445112,
428
+ "412": 10470584,
429
+ "413": 10496056,
430
+ "414": 10521528,
431
+ "415": 10547000,
432
+ "416": 10572472,
433
+ "417": 10597944,
434
+ "418": 10623416,
435
+ "419": 10648888,
436
+ "42": 1095296,
437
+ "420": 10674360,
438
+ "421": 10699832,
439
+ "422": 10725304,
440
+ "423": 10750776,
441
+ "424": 10776248,
442
+ "425": 10801720,
443
+ "426": 10827192,
444
+ "427": 10852664,
445
+ "428": 10878136,
446
+ "429": 10903608,
447
+ "43": 1120768,
448
+ "430": 10929080,
449
+ "431": 10954552,
450
+ "432": 10980024,
451
+ "433": 11005496,
452
+ "434": 11030968,
453
+ "435": 11056440,
454
+ "436": 11081912,
455
+ "437": 11107384,
456
+ "438": 11132856,
457
+ "439": 11158328,
458
+ "44": 1146240,
459
+ "440": 11183800,
460
+ "441": 11209272,
461
+ "442": 11234744,
462
+ "443": 11260216,
463
+ "444": 11285688,
464
+ "445": 11311160,
465
+ "446": 11336632,
466
+ "447": 11362104,
467
+ "448": 11387576,
468
+ "449": 11413048,
469
+ "45": 1171712,
470
+ "450": 11438520,
471
+ "451": 11463992,
472
+ "452": 11489464,
473
+ "453": 11514936,
474
+ "454": 11540408,
475
+ "455": 11565880,
476
+ "456": 11591352,
477
+ "457": 11616824,
478
+ "458": 11642296,
479
+ "459": 11667768,
480
+ "46": 1197184,
481
+ "460": 11693240,
482
+ "461": 11718712,
483
+ "462": 11744184,
484
+ "463": 11769656,
485
+ "464": 11782790,
486
+ "47": 1222656,
487
+ "48": 1248128,
488
+ "49": 1273600,
489
+ "5": 152832,
490
+ "50": 1299072,
491
+ "51": 1324544,
492
+ "52": 1350016,
493
+ "53": 1375488,
494
+ "54": 1400960,
495
+ "55": 1426432,
496
+ "56": 1451904,
497
+ "57": 1477376,
498
+ "58": 1502848,
499
+ "59": 1528320,
500
+ "6": 178304,
501
+ "60": 1553792,
502
+ "61": 1579264,
503
+ "62": 1604736,
504
+ "63": 1630208,
505
+ "64": 1655680,
506
+ "65": 1681152,
507
+ "66": 1706624,
508
+ "67": 1732096,
509
+ "68": 1757568,
510
+ "69": 1783040,
511
+ "7": 203776,
512
+ "70": 1808512,
513
+ "71": 1833984,
514
+ "72": 1859456,
515
+ "73": 1884928,
516
+ "74": 1910400,
517
+ "75": 1935872,
518
+ "76": 1961344,
519
+ "77": 1986816,
520
+ "78": 2012288,
521
+ "79": 2037760,
522
+ "8": 229248,
523
+ "80": 2063232,
524
+ "81": 2088704,
525
+ "82": 2114176,
526
+ "83": 2139648,
527
+ "84": 2165120,
528
+ "85": 2190592,
529
+ "86": 2216064,
530
+ "87": 2241536,
531
+ "88": 2267008,
532
+ "89": 2292480,
533
+ "9": 254720,
534
+ "90": 2317952,
535
+ "91": 2343424,
536
+ "92": 2356558,
537
+ "93": 2382030,
538
+ "94": 2407502,
539
+ "95": 2432974,
540
+ "96": 2458446,
541
+ "97": 2483918,
542
+ "98": 2509390,
543
+ "99": 2534862
544
+ },
545
+ "epoch": 5,
546
+ "incremental_checkpoint_token_usage": {
547
+ "1": 2356558,
548
+ "10": 0,
549
+ "2": 0,
550
+ "3": 2356558,
551
+ "4": 0,
552
+ "5": 2356558,
553
+ "6": 0,
554
+ "7": 2356558,
555
+ "8": 0,
556
+ "9": 2356558
557
+ },
558
+ "incremental_step_token_usage": {
559
+ "0": 25472,
560
+ "1": 25472,
561
+ "10": 25472,
562
+ "100": 25472,
563
+ "101": 25472,
564
+ "102": 25472,
565
+ "103": 25472,
566
+ "104": 25472,
567
+ "105": 25472,
568
+ "106": 25472,
569
+ "107": 25472,
570
+ "108": 25472,
571
+ "109": 25472,
572
+ "11": 25472,
573
+ "110": 25472,
574
+ "111": 25472,
575
+ "112": 25472,
576
+ "113": 25472,
577
+ "114": 25472,
578
+ "115": 25472,
579
+ "116": 25472,
580
+ "117": 25472,
581
+ "118": 25472,
582
+ "119": 25472,
583
+ "12": 25472,
584
+ "120": 25472,
585
+ "121": 25472,
586
+ "122": 25472,
587
+ "123": 25472,
588
+ "124": 25472,
589
+ "125": 25472,
590
+ "126": 25472,
591
+ "127": 25472,
592
+ "128": 25472,
593
+ "129": 25472,
594
+ "13": 25472,
595
+ "130": 25472,
596
+ "131": 25472,
597
+ "132": 25472,
598
+ "133": 25472,
599
+ "134": 25472,
600
+ "135": 25472,
601
+ "136": 25472,
602
+ "137": 25472,
603
+ "138": 25472,
604
+ "139": 25472,
605
+ "14": 25472,
606
+ "140": 25472,
607
+ "141": 25472,
608
+ "142": 25472,
609
+ "143": 25472,
610
+ "144": 25472,
611
+ "145": 25472,
612
+ "146": 25472,
613
+ "147": 25472,
614
+ "148": 25472,
615
+ "149": 25472,
616
+ "15": 25472,
617
+ "150": 25472,
618
+ "151": 25472,
619
+ "152": 25472,
620
+ "153": 25472,
621
+ "154": 25472,
622
+ "155": 25472,
623
+ "156": 25472,
624
+ "157": 25472,
625
+ "158": 25472,
626
+ "159": 25472,
627
+ "16": 25472,
628
+ "160": 25472,
629
+ "161": 25472,
630
+ "162": 25472,
631
+ "163": 25472,
632
+ "164": 25472,
633
+ "165": 25472,
634
+ "166": 25472,
635
+ "167": 25472,
636
+ "168": 25472,
637
+ "169": 25472,
638
+ "17": 25472,
639
+ "170": 25472,
640
+ "171": 25472,
641
+ "172": 25472,
642
+ "173": 25472,
643
+ "174": 25472,
644
+ "175": 25472,
645
+ "176": 25472,
646
+ "177": 25472,
647
+ "178": 25472,
648
+ "179": 25472,
649
+ "18": 25472,
650
+ "180": 25472,
651
+ "181": 25472,
652
+ "182": 25472,
653
+ "183": 25472,
654
+ "184": 25472,
655
+ "185": 13134,
656
+ "186": 25472,
657
+ "187": 25472,
658
+ "188": 25472,
659
+ "189": 25472,
660
+ "19": 25472,
661
+ "190": 25472,
662
+ "191": 25472,
663
+ "192": 25472,
664
+ "193": 25472,
665
+ "194": 25472,
666
+ "195": 25472,
667
+ "196": 25472,
668
+ "197": 25472,
669
+ "198": 25472,
670
+ "199": 25472,
671
+ "2": 25472,
672
+ "20": 25472,
673
+ "200": 25472,
674
+ "201": 25472,
675
+ "202": 25472,
676
+ "203": 25472,
677
+ "204": 25472,
678
+ "205": 25472,
679
+ "206": 25472,
680
+ "207": 25472,
681
+ "208": 25472,
682
+ "209": 25472,
683
+ "21": 25472,
684
+ "210": 25472,
685
+ "211": 25472,
686
+ "212": 25472,
687
+ "213": 25472,
688
+ "214": 25472,
689
+ "215": 25472,
690
+ "216": 25472,
691
+ "217": 25472,
692
+ "218": 25472,
693
+ "219": 25472,
694
+ "22": 25472,
695
+ "220": 25472,
696
+ "221": 25472,
697
+ "222": 25472,
698
+ "223": 25472,
699
+ "224": 25472,
700
+ "225": 25472,
701
+ "226": 25472,
702
+ "227": 25472,
703
+ "228": 25472,
704
+ "229": 25472,
705
+ "23": 25472,
706
+ "230": 25472,
707
+ "231": 25472,
708
+ "232": 25472,
709
+ "233": 25472,
710
+ "234": 25472,
711
+ "235": 25472,
712
+ "236": 25472,
713
+ "237": 25472,
714
+ "238": 25472,
715
+ "239": 25472,
716
+ "24": 25472,
717
+ "240": 25472,
718
+ "241": 25472,
719
+ "242": 25472,
720
+ "243": 25472,
721
+ "244": 25472,
722
+ "245": 25472,
723
+ "246": 25472,
724
+ "247": 25472,
725
+ "248": 25472,
726
+ "249": 25472,
727
+ "25": 25472,
728
+ "250": 25472,
729
+ "251": 25472,
730
+ "252": 25472,
731
+ "253": 25472,
732
+ "254": 25472,
733
+ "255": 25472,
734
+ "256": 25472,
735
+ "257": 25472,
736
+ "258": 25472,
737
+ "259": 25472,
738
+ "26": 25472,
739
+ "260": 25472,
740
+ "261": 25472,
741
+ "262": 25472,
742
+ "263": 25472,
743
+ "264": 25472,
744
+ "265": 25472,
745
+ "266": 25472,
746
+ "267": 25472,
747
+ "268": 25472,
748
+ "269": 25472,
749
+ "27": 25472,
750
+ "270": 25472,
751
+ "271": 25472,
752
+ "272": 25472,
753
+ "273": 25472,
754
+ "274": 25472,
755
+ "275": 25472,
756
+ "276": 25472,
757
+ "277": 25472,
758
+ "278": 13134,
759
+ "279": 25472,
760
+ "28": 25472,
761
+ "280": 25472,
762
+ "281": 25472,
763
+ "282": 25472,
764
+ "283": 25472,
765
+ "284": 25472,
766
+ "285": 25472,
767
+ "286": 25472,
768
+ "287": 25472,
769
+ "288": 25472,
770
+ "289": 25472,
771
+ "29": 25472,
772
+ "290": 25472,
773
+ "291": 25472,
774
+ "292": 25472,
775
+ "293": 25472,
776
+ "294": 25472,
777
+ "295": 25472,
778
+ "296": 25472,
779
+ "297": 25472,
780
+ "298": 25472,
781
+ "299": 25472,
782
+ "3": 25472,
783
+ "30": 25472,
784
+ "300": 25472,
785
+ "301": 25472,
786
+ "302": 25472,
787
+ "303": 25472,
788
+ "304": 25472,
789
+ "305": 25472,
790
+ "306": 25472,
791
+ "307": 25472,
792
+ "308": 25472,
793
+ "309": 25472,
794
+ "31": 25472,
795
+ "310": 25472,
796
+ "311": 25472,
797
+ "312": 25472,
798
+ "313": 25472,
799
+ "314": 25472,
800
+ "315": 25472,
801
+ "316": 25472,
802
+ "317": 25472,
803
+ "318": 25472,
804
+ "319": 25472,
805
+ "32": 25472,
806
+ "320": 25472,
807
+ "321": 25472,
808
+ "322": 25472,
809
+ "323": 25472,
810
+ "324": 25472,
811
+ "325": 25472,
812
+ "326": 25472,
813
+ "327": 25472,
814
+ "328": 25472,
815
+ "329": 25472,
816
+ "33": 25472,
817
+ "330": 25472,
818
+ "331": 25472,
819
+ "332": 25472,
820
+ "333": 25472,
821
+ "334": 25472,
822
+ "335": 25472,
823
+ "336": 25472,
824
+ "337": 25472,
825
+ "338": 25472,
826
+ "339": 25472,
827
+ "34": 25472,
828
+ "340": 25472,
829
+ "341": 25472,
830
+ "342": 25472,
831
+ "343": 25472,
832
+ "344": 25472,
833
+ "345": 25472,
834
+ "346": 25472,
835
+ "347": 25472,
836
+ "348": 25472,
837
+ "349": 25472,
838
+ "35": 25472,
839
+ "350": 25472,
840
+ "351": 25472,
841
+ "352": 25472,
842
+ "353": 25472,
843
+ "354": 25472,
844
+ "355": 25472,
845
+ "356": 25472,
846
+ "357": 25472,
847
+ "358": 25472,
848
+ "359": 25472,
849
+ "36": 25472,
850
+ "360": 25472,
851
+ "361": 25472,
852
+ "362": 25472,
853
+ "363": 25472,
854
+ "364": 25472,
855
+ "365": 25472,
856
+ "366": 25472,
857
+ "367": 25472,
858
+ "368": 25472,
859
+ "369": 25472,
860
+ "37": 25472,
861
+ "370": 25472,
862
+ "371": 13134,
863
+ "372": 25472,
864
+ "373": 25472,
865
+ "374": 25472,
866
+ "375": 25472,
867
+ "376": 25472,
868
+ "377": 25472,
869
+ "378": 25472,
870
+ "379": 25472,
871
+ "38": 25472,
872
+ "380": 25472,
873
+ "381": 25472,
874
+ "382": 25472,
875
+ "383": 25472,
876
+ "384": 25472,
877
+ "385": 25472,
878
+ "386": 25472,
879
+ "387": 25472,
880
+ "388": 25472,
881
+ "389": 25472,
882
+ "39": 25472,
883
+ "390": 25472,
884
+ "391": 25472,
885
+ "392": 25472,
886
+ "393": 25472,
887
+ "394": 25472,
888
+ "395": 25472,
889
+ "396": 25472,
890
+ "397": 25472,
891
+ "398": 25472,
892
+ "399": 25472,
893
+ "4": 25472,
894
+ "40": 25472,
895
+ "400": 25472,
896
+ "401": 25472,
897
+ "402": 25472,
898
+ "403": 25472,
899
+ "404": 25472,
900
+ "405": 25472,
901
+ "406": 25472,
902
+ "407": 25472,
903
+ "408": 25472,
904
+ "409": 25472,
905
+ "41": 25472,
906
+ "410": 25472,
907
+ "411": 25472,
908
+ "412": 25472,
909
+ "413": 25472,
910
+ "414": 25472,
911
+ "415": 25472,
912
+ "416": 25472,
913
+ "417": 25472,
914
+ "418": 25472,
915
+ "419": 25472,
916
+ "42": 25472,
917
+ "420": 25472,
918
+ "421": 25472,
919
+ "422": 25472,
920
+ "423": 25472,
921
+ "424": 25472,
922
+ "425": 25472,
923
+ "426": 25472,
924
+ "427": 25472,
925
+ "428": 25472,
926
+ "429": 25472,
927
+ "43": 25472,
928
+ "430": 25472,
929
+ "431": 25472,
930
+ "432": 25472,
931
+ "433": 25472,
932
+ "434": 25472,
933
+ "435": 25472,
934
+ "436": 25472,
935
+ "437": 25472,
936
+ "438": 25472,
937
+ "439": 25472,
938
+ "44": 25472,
939
+ "440": 25472,
940
+ "441": 25472,
941
+ "442": 25472,
942
+ "443": 25472,
943
+ "444": 25472,
944
+ "445": 25472,
945
+ "446": 25472,
946
+ "447": 25472,
947
+ "448": 25472,
948
+ "449": 25472,
949
+ "45": 25472,
950
+ "450": 25472,
951
+ "451": 25472,
952
+ "452": 25472,
953
+ "453": 25472,
954
+ "454": 25472,
955
+ "455": 25472,
956
+ "456": 25472,
957
+ "457": 25472,
958
+ "458": 25472,
959
+ "459": 25472,
960
+ "46": 25472,
961
+ "460": 25472,
962
+ "461": 25472,
963
+ "462": 25472,
964
+ "463": 25472,
965
+ "464": 13134,
966
+ "47": 25472,
967
+ "48": 25472,
968
+ "49": 25472,
969
+ "5": 25472,
970
+ "50": 25472,
971
+ "51": 25472,
972
+ "52": 25472,
973
+ "53": 25472,
974
+ "54": 25472,
975
+ "55": 25472,
976
+ "56": 25472,
977
+ "57": 25472,
978
+ "58": 25472,
979
+ "59": 25472,
980
+ "6": 25472,
981
+ "60": 25472,
982
+ "61": 25472,
983
+ "62": 25472,
984
+ "63": 25472,
985
+ "64": 25472,
986
+ "65": 25472,
987
+ "66": 25472,
988
+ "67": 25472,
989
+ "68": 25472,
990
+ "69": 25472,
991
+ "7": 25472,
992
+ "70": 25472,
993
+ "71": 25472,
994
+ "72": 25472,
995
+ "73": 25472,
996
+ "74": 25472,
997
+ "75": 25472,
998
+ "76": 25472,
999
+ "77": 25472,
1000
+ "78": 25472,
1001
+ "79": 25472,
1002
+ "8": 25472,
1003
+ "80": 25472,
1004
+ "81": 25472,
1005
+ "82": 25472,
1006
+ "83": 25472,
1007
+ "84": 25472,
1008
+ "85": 25472,
1009
+ "86": 25472,
1010
+ "87": 25472,
1011
+ "88": 25472,
1012
+ "89": 25472,
1013
+ "9": 25472,
1014
+ "90": 25472,
1015
+ "91": 25472,
1016
+ "92": 13134,
1017
+ "93": 25472,
1018
+ "94": 25472,
1019
+ "95": 25472,
1020
+ "96": 25472,
1021
+ "97": 25472,
1022
+ "98": 25472,
1023
+ "99": 25472
1024
+ },
1025
+ "last_improvement_steps": 93,
1026
+ "last_increase_batch_size": 0,
1027
+ "last_increase_batch_size_eval_metric_improvement": 0,
1028
+ "last_increase_batch_size_steps": 0,
1029
+ "last_learning_rate_reduction": 0,
1030
+ "last_learning_rate_reduction_steps": 0,
1031
+ "learning_rate": 0.001,
1032
+ "llm_eval_examples": {},
1033
+ "num_increases_batch_size": 0,
1034
+ "num_reductions_learning_rate": 0,
1035
+ "steps": 465,
1036
+ "test_metrics": {
1037
+ "combined": {
1038
+ "loss": [
1039
+ [
1040
+ 1,
1041
+ 93,
1042
+ 0.23742298781871796
1043
+ ],
1044
+ [
1045
+ 2,
1046
+ 186,
1047
+ 0.21031303703784943
1048
+ ],
1049
+ [
1050
+ 3,
1051
+ 279,
1052
+ 0.12192762643098831
1053
+ ],
1054
+ [
1055
+ 4,
1056
+ 372,
1057
+ 0.07570453733205795
1058
+ ],
1059
+ [
1060
+ 5,
1061
+ 465,
1062
+ 0.05710742995142937
1063
+ ]
1064
+ ]
1065
+ },
1066
+ "language": {
1067
+ "accuracy": [
1068
+ [
1069
+ 1,
1070
+ 93,
1071
+ 0.924312949180603
1072
+ ],
1073
+ [
1074
+ 2,
1075
+ 186,
1076
+ 0.9528379440307617
1077
+ ],
1078
+ [
1079
+ 3,
1080
+ 279,
1081
+ 0.961493730545044
1082
+ ],
1083
+ [
1084
+ 4,
1085
+ 372,
1086
+ 0.9817298650741577
1087
+ ],
1088
+ [
1089
+ 5,
1090
+ 465,
1091
+ 0.9708698987960815
1092
+ ]
1093
+ ],
1094
+ "accuracy_micro": [
1095
+ [
1096
+ 1,
1097
+ 93,
1098
+ 0.9312320947647095
1099
+ ],
1100
+ [
1101
+ 2,
1102
+ 186,
1103
+ 0.9484240412712097
1104
+ ],
1105
+ [
1106
+ 3,
1107
+ 279,
1108
+ 0.9598853588104248
1109
+ ],
1110
+ [
1111
+ 4,
1112
+ 372,
1113
+ 0.9799426794052124
1114
+ ],
1115
+ [
1116
+ 5,
1117
+ 465,
1118
+ 0.9742120504379272
1119
+ ]
1120
+ ],
1121
+ "loss": [
1122
+ [
1123
+ 1,
1124
+ 93,
1125
+ 0.23742298781871796
1126
+ ],
1127
+ [
1128
+ 2,
1129
+ 186,
1130
+ 0.21031303703784943
1131
+ ],
1132
+ [
1133
+ 3,
1134
+ 279,
1135
+ 0.12192762643098831
1136
+ ],
1137
+ [
1138
+ 4,
1139
+ 372,
1140
+ 0.07570453733205795
1141
+ ],
1142
+ [
1143
+ 5,
1144
+ 465,
1145
+ 0.05710742995142937
1146
+ ]
1147
+ ],
1148
+ "roc_auc": [
1149
+ [
1150
+ 1,
1151
+ 93,
1152
+ 0.9985259175300598
1153
+ ],
1154
+ [
1155
+ 2,
1156
+ 186,
1157
+ 0.9976196885108948
1158
+ ],
1159
+ [
1160
+ 3,
1161
+ 279,
1162
+ 0.9982259273529053
1163
+ ],
1164
+ [
1165
+ 4,
1166
+ 372,
1167
+ 0.999114990234375
1168
+ ],
1169
+ [
1170
+ 5,
1171
+ 465,
1172
+ 0.9991633296012878
1173
+ ]
1174
+ ]
1175
+ }
1176
+ },
1177
+ "total_tokens_used": 11782790,
1178
+ "train_metrics": {
1179
+ "combined": {
1180
+ "loss": [
1181
+ [
1182
+ 1,
1183
+ 93,
1184
+ 2.6028196811676025
1185
+ ],
1186
+ [
1187
+ 2,
1188
+ 186,
1189
+ 0.05241914466023445
1190
+ ],
1191
+ [
1192
+ 3,
1193
+ 279,
1194
+ 0.1079530417919159
1195
+ ],
1196
+ [
1197
+ 4,
1198
+ 372,
1199
+ 0.03008267655968666
1200
+ ],
1201
+ [
1202
+ 5,
1203
+ 465,
1204
+ 0.020476186648011208
1205
+ ]
1206
+ ]
1207
+ },
1208
+ "language": {
1209
+ "accuracy": [
1210
+ [
1211
+ 1,
1212
+ 93,
1213
+ 0.709525465965271
1214
+ ],
1215
+ [
1216
+ 2,
1217
+ 186,
1218
+ 0.9832712411880493
1219
+ ],
1220
+ [
1221
+ 3,
1222
+ 279,
1223
+ 0.9843869209289551
1224
+ ],
1225
+ [
1226
+ 4,
1227
+ 372,
1228
+ 0.9913396835327148
1229
+ ],
1230
+ [
1231
+ 5,
1232
+ 465,
1233
+ 0.9936301708221436
1234
+ ]
1235
+ ],
1236
+ "accuracy_micro": [
1237
+ [
1238
+ 1,
1239
+ 93,
1240
+ 0.7127174735069275
1241
+ ],
1242
+ [
1243
+ 2,
1244
+ 186,
1245
+ 0.9831109642982483
1246
+ ],
1247
+ [
1248
+ 3,
1249
+ 279,
1250
+ 0.9853065609931946
1251
+ ],
1252
+ [
1253
+ 4,
1254
+ 372,
1255
+ 0.9910488128662109
1256
+ ],
1257
+ [
1258
+ 5,
1259
+ 465,
1260
+ 0.9930754899978638
1261
+ ]
1262
+ ],
1263
+ "loss": [
1264
+ [
1265
+ 1,
1266
+ 93,
1267
+ 2.6028196811676025
1268
+ ],
1269
+ [
1270
+ 2,
1271
+ 186,
1272
+ 0.05241914466023445
1273
+ ],
1274
+ [
1275
+ 3,
1276
+ 279,
1277
+ 0.1079530417919159
1278
+ ],
1279
+ [
1280
+ 4,
1281
+ 372,
1282
+ 0.03008267655968666
1283
+ ],
1284
+ [
1285
+ 5,
1286
+ 465,
1287
+ 0.020476186648011208
1288
+ ]
1289
+ ],
1290
+ "roc_auc": [
1291
+ [
1292
+ 1,
1293
+ 93,
1294
+ 0.8537933826446533
1295
+ ],
1296
+ [
1297
+ 2,
1298
+ 186,
1299
+ 0.9993903636932373
1300
+ ],
1301
+ [
1302
+ 3,
1303
+ 279,
1304
+ 0.9988884925842285
1305
+ ],
1306
+ [
1307
+ 4,
1308
+ 372,
1309
+ 0.9997660517692566
1310
+ ],
1311
+ [
1312
+ 5,
1313
+ 465,
1314
+ 0.9998530745506287
1315
+ ]
1316
+ ]
1317
+ }
1318
+ },
1319
+ "tune_checkpoint_num": 0,
1320
+ "validation_metrics": {
1321
+ "combined": {
1322
+ "loss": [
1323
+ [
1324
+ 1,
1325
+ 93,
1326
+ 0.2647719979286194
1327
+ ],
1328
+ [
1329
+ 2,
1330
+ 186,
1331
+ 0.27919644117355347
1332
+ ],
1333
+ [
1334
+ 3,
1335
+ 279,
1336
+ 0.1389603614807129
1337
+ ],
1338
+ [
1339
+ 4,
1340
+ 372,
1341
+ 0.06920339167118073
1342
+ ],
1343
+ [
1344
+ 5,
1345
+ 465,
1346
+ 0.07529747486114502
1347
+ ]
1348
+ ]
1349
+ },
1350
+ "language": {
1351
+ "accuracy": [
1352
+ [
1353
+ 1,
1354
+ 93,
1355
+ 0.911348819732666
1356
+ ],
1357
+ [
1358
+ 2,
1359
+ 186,
1360
+ 0.9450980424880981
1361
+ ],
1362
+ [
1363
+ 3,
1364
+ 279,
1365
+ 0.9551990628242493
1366
+ ],
1367
+ [
1368
+ 4,
1369
+ 372,
1370
+ 0.9799168109893799
1371
+ ],
1372
+ [
1373
+ 5,
1374
+ 465,
1375
+ 0.9636363983154297
1376
+ ]
1377
+ ],
1378
+ "accuracy_micro": [
1379
+ [
1380
+ 1,
1381
+ 93,
1382
+ 0.9225251078605652
1383
+ ],
1384
+ [
1385
+ 2,
1386
+ 186,
1387
+ 0.9397417306900024
1388
+ ],
1389
+ [
1390
+ 3,
1391
+ 279,
1392
+ 0.955523669719696
1393
+ ],
1394
+ [
1395
+ 4,
1396
+ 372,
1397
+ 0.9784792065620422
1398
+ ],
1399
+ [
1400
+ 5,
1401
+ 465,
1402
+ 0.9684361815452576
1403
+ ]
1404
+ ],
1405
+ "loss": [
1406
+ [
1407
+ 1,
1408
+ 93,
1409
+ 0.2647719979286194
1410
+ ],
1411
+ [
1412
+ 2,
1413
+ 186,
1414
+ 0.27919644117355347
1415
+ ],
1416
+ [
1417
+ 3,
1418
+ 279,
1419
+ 0.1389603614807129
1420
+ ],
1421
+ [
1422
+ 4,
1423
+ 372,
1424
+ 0.06920339167118073
1425
+ ],
1426
+ [
1427
+ 5,
1428
+ 465,
1429
+ 0.07529747486114502
1430
+ ]
1431
+ ],
1432
+ "roc_auc": [
1433
+ [
1434
+ 1,
1435
+ 93,
1436
+ 0.9981002807617188
1437
+ ],
1438
+ [
1439
+ 2,
1440
+ 186,
1441
+ 0.99806147813797
1442
+ ],
1443
+ [
1444
+ 3,
1445
+ 279,
1446
+ 0.9984046816825867
1447
+ ],
1448
+ [
1449
+ 4,
1450
+ 372,
1451
+ 0.998852550983429
1452
+ ],
1453
+ [
1454
+ 5,
1455
+ 465,
1456
+ 0.9990896582603455
1457
+ ]
1458
+ ]
1459
+ }
1460
+ }
1461
+ }
model/training_set_metadata.json ADDED
The diff for this file is too large to render. See raw diff
 
training_statistics.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluation_frequency": {
3
+ "frequency": 1,
4
+ "period": "epoch"
5
+ },
6
+ "test": {
7
+ "combined": {
8
+ "loss": [
9
+ 0.23742298781871796,
10
+ 0.21031303703784943,
11
+ 0.12192762643098831,
12
+ 0.07570453733205795,
13
+ 0.05710742995142937
14
+ ]
15
+ },
16
+ "language": {
17
+ "accuracy": [
18
+ 0.924312949180603,
19
+ 0.9528379440307617,
20
+ 0.961493730545044,
21
+ 0.9817298650741577,
22
+ 0.9708698987960815
23
+ ],
24
+ "accuracy_micro": [
25
+ 0.9312320947647095,
26
+ 0.9484240412712097,
27
+ 0.9598853588104248,
28
+ 0.9799426794052124,
29
+ 0.9742120504379272
30
+ ],
31
+ "loss": [
32
+ 0.23742298781871796,
33
+ 0.21031303703784943,
34
+ 0.12192762643098831,
35
+ 0.07570453733205795,
36
+ 0.05710742995142937
37
+ ],
38
+ "roc_auc": [
39
+ 0.9985259175300598,
40
+ 0.9976196885108948,
41
+ 0.9982259273529053,
42
+ 0.999114990234375,
43
+ 0.9991633296012878
44
+ ]
45
+ }
46
+ },
47
+ "training": {
48
+ "combined": {
49
+ "loss": [
50
+ 2.6028196811676025,
51
+ 0.05241914466023445,
52
+ 0.1079530417919159,
53
+ 0.03008267655968666,
54
+ 0.020476186648011208
55
+ ]
56
+ },
57
+ "language": {
58
+ "accuracy": [
59
+ 0.709525465965271,
60
+ 0.9832712411880493,
61
+ 0.9843869209289551,
62
+ 0.9913396835327148,
63
+ 0.9936301708221436
64
+ ],
65
+ "accuracy_micro": [
66
+ 0.7127174735069275,
67
+ 0.9831109642982483,
68
+ 0.9853065609931946,
69
+ 0.9910488128662109,
70
+ 0.9930754899978638
71
+ ],
72
+ "loss": [
73
+ 2.6028196811676025,
74
+ 0.05241914466023445,
75
+ 0.1079530417919159,
76
+ 0.03008267655968666,
77
+ 0.020476186648011208
78
+ ],
79
+ "roc_auc": [
80
+ 0.8537933826446533,
81
+ 0.9993903636932373,
82
+ 0.9988884925842285,
83
+ 0.9997660517692566,
84
+ 0.9998530745506287
85
+ ]
86
+ }
87
+ },
88
+ "validation": {
89
+ "combined": {
90
+ "loss": [
91
+ 0.2647719979286194,
92
+ 0.27919644117355347,
93
+ 0.1389603614807129,
94
+ 0.06920339167118073,
95
+ 0.07529747486114502
96
+ ]
97
+ },
98
+ "language": {
99
+ "accuracy": [
100
+ 0.911348819732666,
101
+ 0.9450980424880981,
102
+ 0.9551990628242493,
103
+ 0.9799168109893799,
104
+ 0.9636363983154297
105
+ ],
106
+ "accuracy_micro": [
107
+ 0.9225251078605652,
108
+ 0.9397417306900024,
109
+ 0.955523669719696,
110
+ 0.9784792065620422,
111
+ 0.9684361815452576
112
+ ],
113
+ "loss": [
114
+ 0.2647719979286194,
115
+ 0.27919644117355347,
116
+ 0.1389603614807129,
117
+ 0.06920339167118073,
118
+ 0.07529747486114502
119
+ ],
120
+ "roc_auc": [
121
+ 0.9981002807617188,
122
+ 0.99806147813797,
123
+ 0.9984046816825867,
124
+ 0.998852550983429,
125
+ 0.9990896582603455
126
+ ]
127
+ }
128
+ }
129
+ }