jnmrr commited on
Commit
eec1cd1
·
verified ·
1 Parent(s): 37e4136

Upload model - 2024-12-12 01:23

Browse files
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - image-classification
4
+ - document-classification
5
+ - vision
6
+ library_name: transformers
7
+ pipeline_tag: image-classification
8
+ license: mit
9
+ ---
10
+
11
+ # Document Classification Model
12
+
13
+ ## Overview
14
+ This model is trained for document classification using vision transformers (DiT).
15
+
16
+ ## Model Details
17
+ * Architecture: Vision Transformer (DiT)
18
+ * Tasks: Document Classification
19
+ * Training Framework: 🤗 Transformers
20
+ * Base Model: microsoft/dit-large
21
+ * Training Dataset Size: 32786
22
+
23
+ ## Training Parameters
24
+ * Batch Size: 256
25
+ * Learning Rate: 0.002
26
+ * Number of Epochs: 1
27
+ * Mixed Precision: BF16
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
33
+ from PIL import Image
34
+
35
+ # Load model and processor
36
+ processor = AutoImageProcessor.from_pretrained("jnmrr/ds3-img-classification")
37
+ model = AutoModelForImageClassification.from_pretrained("jnmrr/ds3-img-classification")
38
+
39
+ # Process an image
40
+ image = Image.open("document.png")
41
+ inputs = processor(image, return_tensors="pt")
42
+
43
+ # Make prediction
44
+ outputs = model(**inputs)
45
+ predicted_label = outputs.logits.argmax(-1).item()
46
+ ```
all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9922480620155039,
3
+ "eval_accuracy": 0.15015097447158934,
4
+ "eval_loss": 4.886590003967285,
5
+ "eval_macro_f1": 0.033358390849091486,
6
+ "eval_macro_precision": 0.039403090337421884,
7
+ "eval_macro_recall": 0.07102345263438722,
8
+ "eval_micro_f1": 0.15015097447158934,
9
+ "eval_micro_precision": 0.15015097447158934,
10
+ "eval_micro_recall": 0.15015097447158934,
11
+ "eval_runtime": 10.4546,
12
+ "eval_samples_per_second": 348.46,
13
+ "eval_steps_per_second": 0.765,
14
+ "eval_weighted_f1": 0.054572202456524575,
15
+ "eval_weighted_precision": 0.06415751425956755,
16
+ "eval_weighted_recall": 0.15015097447158934,
17
+ "total_flos": 8.978215898519175e+18,
18
+ "train_loss": 4.798892915248871,
19
+ "train_runtime": 164.8721,
20
+ "train_samples_per_second": 198.857,
21
+ "train_steps_per_second": 0.388
22
+ }
checkpoint-35/config.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/dit-large",
3
+ "add_fpn": false,
4
+ "architectures": [
5
+ "BeitForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "auxiliary_channels": 256,
9
+ "auxiliary_concat_input": false,
10
+ "auxiliary_loss_weight": 0.4,
11
+ "auxiliary_num_convs": 1,
12
+ "drop_path_rate": 0.1,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0.0,
15
+ "hidden_size": 1024,
16
+ "id2label": {
17
+ "0": "informe_social",
18
+ "1": "cargo_ingreso_mpe",
19
+ "2": "acta_registro_personal",
20
+ "3": "citacion_pnp",
21
+ "4": "acta_lectura_derechos_ley30364",
22
+ "5": "acta_intervencion_minpu",
23
+ "6": "denuncia_policial",
24
+ "7": "providencia_fiscal",
25
+ "8": "consulta_medidas_proteccion",
26
+ "9": "oficio_medicina_legal",
27
+ "10": "oficio_evaluacion_psicologica",
28
+ "11": "resolucion_judicial_audiencia",
29
+ "12": "certificado_medico_legal",
30
+ "13": "ficha_datos_sidpol",
31
+ "14": "consulta_sucamec",
32
+ "15": "informe_mimp",
33
+ "16": "ficha_valoracion_riesgo_manual",
34
+ "17": "oficio_atencion_integral",
35
+ "18": "escrito_minpu",
36
+ "19": "informe_psicologico",
37
+ "20": "ficha_valoracion_riesgo_digital",
38
+ "21": "informe_medico",
39
+ "22": "croquis_domicilio",
40
+ "23": "ficha_datos_reniec",
41
+ "24": "escrito_pj",
42
+ "25": "evidencia_chat",
43
+ "26": "declaracion_minpu",
44
+ "27": "notificacion_resolucion_judicial_audiencia",
45
+ "28": "consulta_antecedentes_penales",
46
+ "29": "notificacion_detencion_pnp",
47
+ "30": "consulta_personas_sidpol",
48
+ "31": "escrito_pnp",
49
+ "32": "constancia_buen_trato",
50
+ "33": "declaracion_pnp",
51
+ "34": "escrito_mimp",
52
+ "35": "acta_intervencion_pnp",
53
+ "36": "constancia_notificacion",
54
+ "37": "cargo_ingreso_sij",
55
+ "38": "anexo_dni",
56
+ "39": "hoja_blanco",
57
+ "40": "consulta_requisitorias"
58
+ },
59
+ "image_size": 224,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 4096,
62
+ "label2id": {
63
+ "acta_intervencion_minpu": 5,
64
+ "acta_intervencion_pnp": 35,
65
+ "acta_lectura_derechos_ley30364": 4,
66
+ "acta_registro_personal": 2,
67
+ "anexo_dni": 38,
68
+ "cargo_ingreso_mpe": 1,
69
+ "cargo_ingreso_sij": 37,
70
+ "certificado_medico_legal": 12,
71
+ "citacion_pnp": 3,
72
+ "constancia_buen_trato": 32,
73
+ "constancia_notificacion": 36,
74
+ "consulta_antecedentes_penales": 28,
75
+ "consulta_medidas_proteccion": 8,
76
+ "consulta_personas_sidpol": 30,
77
+ "consulta_requisitorias": 40,
78
+ "consulta_sucamec": 14,
79
+ "croquis_domicilio": 22,
80
+ "declaracion_minpu": 26,
81
+ "declaracion_pnp": 33,
82
+ "denuncia_policial": 6,
83
+ "escrito_mimp": 34,
84
+ "escrito_minpu": 18,
85
+ "escrito_pj": 24,
86
+ "escrito_pnp": 31,
87
+ "evidencia_chat": 25,
88
+ "ficha_datos_reniec": 23,
89
+ "ficha_datos_sidpol": 13,
90
+ "ficha_valoracion_riesgo_digital": 20,
91
+ "ficha_valoracion_riesgo_manual": 16,
92
+ "hoja_blanco": 39,
93
+ "informe_medico": 21,
94
+ "informe_mimp": 15,
95
+ "informe_psicologico": 19,
96
+ "informe_social": 0,
97
+ "notificacion_detencion_pnp": 29,
98
+ "notificacion_resolucion_judicial_audiencia": 27,
99
+ "oficio_atencion_integral": 17,
100
+ "oficio_evaluacion_psicologica": 10,
101
+ "oficio_medicina_legal": 9,
102
+ "providencia_fiscal": 7,
103
+ "resolucion_judicial_audiencia": 11
104
+ },
105
+ "layer_norm_eps": 1e-12,
106
+ "layer_scale_init_value": 0.1,
107
+ "model_type": "beit",
108
+ "num_attention_heads": 16,
109
+ "num_channels": 3,
110
+ "num_hidden_layers": 24,
111
+ "out_features": [
112
+ "stage3",
113
+ "stage5",
114
+ "stage7",
115
+ "stage11"
116
+ ],
117
+ "out_indices": [
118
+ 3,
119
+ 5,
120
+ 7,
121
+ 11
122
+ ],
123
+ "patch_size": 16,
124
+ "pool_scales": [
125
+ 1,
126
+ 2,
127
+ 3,
128
+ 6
129
+ ],
130
+ "problem_type": "single_label_classification",
131
+ "reshape_hidden_states": true,
132
+ "semantic_loss_ignore_index": 255,
133
+ "stage_names": [
134
+ "stem",
135
+ "stage1",
136
+ "stage2",
137
+ "stage3",
138
+ "stage4",
139
+ "stage5",
140
+ "stage6",
141
+ "stage7",
142
+ "stage8",
143
+ "stage9",
144
+ "stage10",
145
+ "stage11",
146
+ "stage12",
147
+ "stage13",
148
+ "stage14",
149
+ "stage15",
150
+ "stage16",
151
+ "stage17",
152
+ "stage18",
153
+ "stage19",
154
+ "stage20",
155
+ "stage21",
156
+ "stage22",
157
+ "stage23",
158
+ "stage24"
159
+ ],
160
+ "torch_dtype": "float32",
161
+ "transformers_version": "4.46.3",
162
+ "use_absolute_position_embeddings": true,
163
+ "use_auxiliary_head": true,
164
+ "use_mask_token": true,
165
+ "use_mean_pooling": true,
166
+ "use_relative_position_bias": false,
167
+ "use_shared_relative_position_bias": false,
168
+ "vocab_size": 8192
169
+ }
checkpoint-35/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
3
+ size 1213526036
checkpoint-35/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": false,
7
+ "do_normalize": true,
8
+ "do_reduce_labels": false,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "image_processor_type": "BeitImageProcessor",
17
+ "image_std": [
18
+ 0.5,
19
+ 0.5,
20
+ 0.5
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "height": 224,
26
+ "width": 224
27
+ }
28
+ }
checkpoint-35/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16312646f5c5640cf4b41e6af735a1c4608d535ed5abf8c5704b217a771e71a
3
+ size 14244
checkpoint-35/trainer_state.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.13999451001921492,
3
+ "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
+ "epoch": 0.5426356589147286,
5
+ "eval_steps": 35,
6
+ "global_step": 35,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.015503875968992248,
13
+ "grad_norm": 12.836527824401855,
14
+ "learning_rate": 0.0002857142857142857,
15
+ "loss": 5.7815,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.5426356589147286,
20
+ "grad_norm": 6.243555545806885,
21
+ "learning_rate": 0.0010275543423681622,
22
+ "loss": 4.9108,
23
+ "step": 35
24
+ },
25
+ {
26
+ "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.13999451001921492,
28
+ "eval_loss": 5.21905517578125,
29
+ "eval_macro_f1": 0.04648934733573823,
30
+ "eval_macro_precision": 0.03716366453858833,
31
+ "eval_macro_recall": 0.09867228278248098,
32
+ "eval_micro_f1": 0.13999451001921492,
33
+ "eval_micro_precision": 0.13999451001921492,
34
+ "eval_micro_recall": 0.13999451001921492,
35
+ "eval_runtime": 8.2153,
36
+ "eval_samples_per_second": 443.44,
37
+ "eval_steps_per_second": 3.53,
38
+ "eval_weighted_f1": 0.0724726180615966,
39
+ "eval_weighted_precision": 0.06178867753876749,
40
+ "eval_weighted_recall": 0.13999451001921492,
41
+ "step": 35
42
+ }
43
+ ],
44
+ "logging_steps": 35,
45
+ "max_steps": 64,
46
+ "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 1,
48
+ "save_steps": 35,
49
+ "stateful_callbacks": {
50
+ "TrainerControl": {
51
+ "args": {
52
+ "should_epoch_stop": false,
53
+ "should_evaluate": false,
54
+ "should_log": false,
55
+ "should_save": true,
56
+ "should_training_stop": false
57
+ },
58
+ "attributes": {}
59
+ }
60
+ },
61
+ "total_flos": 4.909961819502674e+18,
62
+ "train_batch_size": 256,
63
+ "trial_name": null,
64
+ "trial_params": null
65
+ }
checkpoint-64/config.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/dit-large",
3
+ "add_fpn": false,
4
+ "architectures": [
5
+ "BeitForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "auxiliary_channels": 256,
9
+ "auxiliary_concat_input": false,
10
+ "auxiliary_loss_weight": 0.4,
11
+ "auxiliary_num_convs": 1,
12
+ "drop_path_rate": 0.1,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0.0,
15
+ "hidden_size": 1024,
16
+ "id2label": {
17
+ "0": "informe_social",
18
+ "1": "cargo_ingreso_mpe",
19
+ "2": "acta_registro_personal",
20
+ "3": "citacion_pnp",
21
+ "4": "acta_lectura_derechos_ley30364",
22
+ "5": "acta_intervencion_minpu",
23
+ "6": "denuncia_policial",
24
+ "7": "providencia_fiscal",
25
+ "8": "consulta_medidas_proteccion",
26
+ "9": "oficio_medicina_legal",
27
+ "10": "oficio_evaluacion_psicologica",
28
+ "11": "resolucion_judicial_audiencia",
29
+ "12": "certificado_medico_legal",
30
+ "13": "ficha_datos_sidpol",
31
+ "14": "consulta_sucamec",
32
+ "15": "informe_mimp",
33
+ "16": "ficha_valoracion_riesgo_manual",
34
+ "17": "oficio_atencion_integral",
35
+ "18": "escrito_minpu",
36
+ "19": "informe_psicologico",
37
+ "20": "ficha_valoracion_riesgo_digital",
38
+ "21": "informe_medico",
39
+ "22": "croquis_domicilio",
40
+ "23": "ficha_datos_reniec",
41
+ "24": "escrito_pj",
42
+ "25": "evidencia_chat",
43
+ "26": "declaracion_minpu",
44
+ "27": "notificacion_resolucion_judicial_audiencia",
45
+ "28": "consulta_antecedentes_penales",
46
+ "29": "notificacion_detencion_pnp",
47
+ "30": "consulta_personas_sidpol",
48
+ "31": "escrito_pnp",
49
+ "32": "constancia_buen_trato",
50
+ "33": "declaracion_pnp",
51
+ "34": "escrito_mimp",
52
+ "35": "acta_intervencion_pnp",
53
+ "36": "constancia_notificacion",
54
+ "37": "cargo_ingreso_sij",
55
+ "38": "anexo_dni",
56
+ "39": "hoja_blanco",
57
+ "40": "consulta_requisitorias"
58
+ },
59
+ "image_size": 224,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 4096,
62
+ "label2id": {
63
+ "acta_intervencion_minpu": 5,
64
+ "acta_intervencion_pnp": 35,
65
+ "acta_lectura_derechos_ley30364": 4,
66
+ "acta_registro_personal": 2,
67
+ "anexo_dni": 38,
68
+ "cargo_ingreso_mpe": 1,
69
+ "cargo_ingreso_sij": 37,
70
+ "certificado_medico_legal": 12,
71
+ "citacion_pnp": 3,
72
+ "constancia_buen_trato": 32,
73
+ "constancia_notificacion": 36,
74
+ "consulta_antecedentes_penales": 28,
75
+ "consulta_medidas_proteccion": 8,
76
+ "consulta_personas_sidpol": 30,
77
+ "consulta_requisitorias": 40,
78
+ "consulta_sucamec": 14,
79
+ "croquis_domicilio": 22,
80
+ "declaracion_minpu": 26,
81
+ "declaracion_pnp": 33,
82
+ "denuncia_policial": 6,
83
+ "escrito_mimp": 34,
84
+ "escrito_minpu": 18,
85
+ "escrito_pj": 24,
86
+ "escrito_pnp": 31,
87
+ "evidencia_chat": 25,
88
+ "ficha_datos_reniec": 23,
89
+ "ficha_datos_sidpol": 13,
90
+ "ficha_valoracion_riesgo_digital": 20,
91
+ "ficha_valoracion_riesgo_manual": 16,
92
+ "hoja_blanco": 39,
93
+ "informe_medico": 21,
94
+ "informe_mimp": 15,
95
+ "informe_psicologico": 19,
96
+ "informe_social": 0,
97
+ "notificacion_detencion_pnp": 29,
98
+ "notificacion_resolucion_judicial_audiencia": 27,
99
+ "oficio_atencion_integral": 17,
100
+ "oficio_evaluacion_psicologica": 10,
101
+ "oficio_medicina_legal": 9,
102
+ "providencia_fiscal": 7,
103
+ "resolucion_judicial_audiencia": 11
104
+ },
105
+ "layer_norm_eps": 1e-12,
106
+ "layer_scale_init_value": 0.1,
107
+ "model_type": "beit",
108
+ "num_attention_heads": 16,
109
+ "num_channels": 3,
110
+ "num_hidden_layers": 24,
111
+ "out_features": [
112
+ "stage3",
113
+ "stage5",
114
+ "stage7",
115
+ "stage11"
116
+ ],
117
+ "out_indices": [
118
+ 3,
119
+ 5,
120
+ 7,
121
+ 11
122
+ ],
123
+ "patch_size": 16,
124
+ "pool_scales": [
125
+ 1,
126
+ 2,
127
+ 3,
128
+ 6
129
+ ],
130
+ "problem_type": "single_label_classification",
131
+ "reshape_hidden_states": true,
132
+ "semantic_loss_ignore_index": 255,
133
+ "stage_names": [
134
+ "stem",
135
+ "stage1",
136
+ "stage2",
137
+ "stage3",
138
+ "stage4",
139
+ "stage5",
140
+ "stage6",
141
+ "stage7",
142
+ "stage8",
143
+ "stage9",
144
+ "stage10",
145
+ "stage11",
146
+ "stage12",
147
+ "stage13",
148
+ "stage14",
149
+ "stage15",
150
+ "stage16",
151
+ "stage17",
152
+ "stage18",
153
+ "stage19",
154
+ "stage20",
155
+ "stage21",
156
+ "stage22",
157
+ "stage23",
158
+ "stage24"
159
+ ],
160
+ "torch_dtype": "float32",
161
+ "transformers_version": "4.46.3",
162
+ "use_absolute_position_embeddings": true,
163
+ "use_auxiliary_head": true,
164
+ "use_mask_token": true,
165
+ "use_mean_pooling": true,
166
+ "use_relative_position_bias": false,
167
+ "use_shared_relative_position_bias": false,
168
+ "vocab_size": 8192
169
+ }
checkpoint-64/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95a5acea4810d62c8e91883a28ffee70ed6514bb963bcbeff5fd3d711ff1f10
3
+ size 1213526036
checkpoint-64/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": false,
7
+ "do_normalize": true,
8
+ "do_reduce_labels": false,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "image_processor_type": "BeitImageProcessor",
17
+ "image_std": [
18
+ 0.5,
19
+ 0.5,
20
+ 0.5
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "height": 224,
26
+ "width": 224
27
+ }
28
+ }
checkpoint-64/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd323ff31f4540314edfc5f0517b90e0430bc578b03d774b83916b841baca9c
3
+ size 14244
checkpoint-64/trainer_state.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.13999451001921492,
3
+ "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
+ "epoch": 0.9922480620155039,
5
+ "eval_steps": 35,
6
+ "global_step": 64,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.015503875968992248,
13
+ "grad_norm": 12.836527824401855,
14
+ "learning_rate": 0.0002857142857142857,
15
+ "loss": 5.7815,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.5426356589147286,
20
+ "grad_norm": 6.243555545806885,
21
+ "learning_rate": 0.0010275543423681622,
22
+ "loss": 4.9108,
23
+ "step": 35
24
+ },
25
+ {
26
+ "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.13999451001921492,
28
+ "eval_loss": 5.21905517578125,
29
+ "eval_macro_f1": 0.04648934733573823,
30
+ "eval_macro_precision": 0.03716366453858833,
31
+ "eval_macro_recall": 0.09867228278248098,
32
+ "eval_micro_f1": 0.13999451001921492,
33
+ "eval_micro_precision": 0.13999451001921492,
34
+ "eval_micro_recall": 0.13999451001921492,
35
+ "eval_runtime": 8.2153,
36
+ "eval_samples_per_second": 443.44,
37
+ "eval_steps_per_second": 3.53,
38
+ "eval_weighted_f1": 0.0724726180615966,
39
+ "eval_weighted_precision": 0.06178867753876749,
40
+ "eval_weighted_recall": 0.13999451001921492,
41
+ "step": 35
42
+ }
43
+ ],
44
+ "logging_steps": 35,
45
+ "max_steps": 64,
46
+ "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 1,
48
+ "save_steps": 35,
49
+ "stateful_callbacks": {
50
+ "TrainerControl": {
51
+ "args": {
52
+ "should_epoch_stop": false,
53
+ "should_evaluate": false,
54
+ "should_log": false,
55
+ "should_save": true,
56
+ "should_training_stop": true
57
+ },
58
+ "attributes": {}
59
+ }
60
+ },
61
+ "total_flos": 8.978215898519175e+18,
62
+ "train_batch_size": 256,
63
+ "trial_name": null,
64
+ "trial_params": null
65
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.15015097447158934,
3
+ "eval_loss": 4.886590003967285,
4
+ "eval_macro_f1": 0.033358390849091486,
5
+ "eval_macro_precision": 0.039403090337421884,
6
+ "eval_macro_recall": 0.07102345263438722,
7
+ "eval_micro_f1": 0.15015097447158934,
8
+ "eval_micro_precision": 0.15015097447158934,
9
+ "eval_micro_recall": 0.15015097447158934,
10
+ "eval_runtime": 10.4546,
11
+ "eval_samples_per_second": 348.46,
12
+ "eval_steps_per_second": 0.765,
13
+ "eval_weighted_f1": 0.054572202456524575,
14
+ "eval_weighted_precision": 0.06415751425956755,
15
+ "eval_weighted_recall": 0.15015097447158934
16
+ }
metrics.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_runtime": 163.0123,
3
+ "train_samples_per_second": 201.126,
4
+ "train_steps_per_second": 0.393,
5
+ "total_flos": 8.978215898519175e+18,
6
+ "train_loss": 4.277425870299339,
7
+ "epoch": 0.9922480620155039
8
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d4ba2f71de80d1f45315dc0f68b08a4f8ef93be9c2b07beec6073786df357e9
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
3
  size 1213526036
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9922480620155039,
3
+ "total_flos": 8.978215898519175e+18,
4
+ "train_loss": 4.798892915248871,
5
+ "train_runtime": 164.8721,
6
+ "train_samples_per_second": 198.857,
7
+ "train_steps_per_second": 0.388
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.13999451001921492,
3
+ "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
+ "epoch": 0.9922480620155039,
5
+ "eval_steps": 35,
6
+ "global_step": 64,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.015503875968992248,
13
+ "grad_norm": 12.836527824401855,
14
+ "learning_rate": 0.0002857142857142857,
15
+ "loss": 5.7815,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.5426356589147286,
20
+ "grad_norm": 6.243555545806885,
21
+ "learning_rate": 0.0010275543423681622,
22
+ "loss": 4.9108,
23
+ "step": 35
24
+ },
25
+ {
26
+ "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.13999451001921492,
28
+ "eval_loss": 5.21905517578125,
29
+ "eval_macro_f1": 0.04648934733573823,
30
+ "eval_macro_precision": 0.03716366453858833,
31
+ "eval_macro_recall": 0.09867228278248098,
32
+ "eval_micro_f1": 0.13999451001921492,
33
+ "eval_micro_precision": 0.13999451001921492,
34
+ "eval_micro_recall": 0.13999451001921492,
35
+ "eval_runtime": 8.2153,
36
+ "eval_samples_per_second": 443.44,
37
+ "eval_steps_per_second": 3.53,
38
+ "eval_weighted_f1": 0.0724726180615966,
39
+ "eval_weighted_precision": 0.06178867753876749,
40
+ "eval_weighted_recall": 0.13999451001921492,
41
+ "step": 35
42
+ },
43
+ {
44
+ "epoch": 0.9922480620155039,
45
+ "step": 64,
46
+ "total_flos": 8.978215898519175e+18,
47
+ "train_loss": 4.277425870299339,
48
+ "train_runtime": 163.0123,
49
+ "train_samples_per_second": 201.126,
50
+ "train_steps_per_second": 0.393
51
+ }
52
+ ],
53
+ "logging_steps": 35,
54
+ "max_steps": 64,
55
+ "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 1,
57
+ "save_steps": 35,
58
+ "stateful_callbacks": {
59
+ "TrainerControl": {
60
+ "args": {
61
+ "should_epoch_stop": false,
62
+ "should_evaluate": false,
63
+ "should_log": false,
64
+ "should_save": true,
65
+ "should_training_stop": true
66
+ },
67
+ "attributes": {}
68
+ }
69
+ },
70
+ "total_flos": 8.978215898519175e+18,
71
+ "train_batch_size": 256,
72
+ "trial_name": null,
73
+ "trial_params": null
74
+ }
training_metrics.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "train_runtime": 163.1429,
4
+ "train_samples_per_second": 200.965,
5
+ "train_steps_per_second": 0.392,
6
+ "total_flos": 8.978215898519175e+18,
7
+ "train_loss": 5.019116312265396,
8
+ "epoch": 0.9922480620155039
9
+ },
10
+ "eval": {
11
+ "eval_loss": 4.839980125427246,
12
+ "eval_accuracy": 0.13889651386220148,
13
+ "eval_weighted_f1": 0.049892274188486484,
14
+ "eval_micro_f1": 0.13889651386220148,
15
+ "eval_macro_f1": 0.027543503036345927,
16
+ "eval_weighted_recall": 0.13889651386220148,
17
+ "eval_micro_recall": 0.13889651386220148,
18
+ "eval_macro_recall": 0.05869405594955945,
19
+ "eval_weighted_precision": 0.031947616026715114,
20
+ "eval_micro_precision": 0.13889651386220148,
21
+ "eval_macro_precision": 0.018972609174899638,
22
+ "eval_runtime": 10.6093,
23
+ "eval_samples_per_second": 343.379,
24
+ "eval_steps_per_second": 0.754
25
+ }
26
+ }