jnmrr commited on Dec 12, 2024

Commit

eec1cd1

verified ·

1 Parent(s): 37e4136

Upload model - 2024-12-12 01:23

Browse files

Files changed (18) hide show

README.md +46 -0
all_results.json +22 -0
checkpoint-35/config.json +169 -0
checkpoint-35/model.safetensors +3 -0
checkpoint-35/preprocessor_config.json +28 -0
checkpoint-35/rng_state.pth +3 -0
checkpoint-35/trainer_state.json +65 -0
checkpoint-64/config.json +169 -0
checkpoint-64/model.safetensors +3 -0
checkpoint-64/preprocessor_config.json +28 -0
checkpoint-64/rng_state.pth +3 -0
checkpoint-64/trainer_state.json +65 -0
eval_results.json +16 -0
metrics.json +8 -0
model.safetensors +1 -1
train_results.json +8 -0
trainer_state.json +74 -0
training_metrics.json +26 -0

README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+---
+tags:
+- image-classification
+- document-classification
+- vision
+library_name: transformers
+pipeline_tag: image-classification
+license: mit
+---
+# Document Classification Model
+## Overview
+This model is trained for document classification using vision transformers (DiT).
+## Model Details
+* Architecture: Vision Transformer (DiT)
+* Tasks: Document Classification
+* Training Framework: 🤗 Transformers
+* Base Model: microsoft/dit-large
+* Training Dataset Size: 32786
+## Training Parameters
+* Batch Size: 256
+* Learning Rate: 0.002
+* Number of Epochs: 1
+* Mixed Precision: BF16
+## Usage
+```python
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from PIL import Image
+# Load model and processor
+processor = AutoImageProcessor.from_pretrained("jnmrr/ds3-img-classification")
+model = AutoModelForImageClassification.from_pretrained("jnmrr/ds3-img-classification")
+# Process an image
+image = Image.open("document.png")
+inputs = processor(image, return_tensors="pt")
+# Make prediction
+outputs = model(**inputs)
+predicted_label = outputs.logits.argmax(-1).item()
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "epoch": 0.9922480620155039,
+    "eval_accuracy": 0.15015097447158934,
+    "eval_loss": 4.886590003967285,
+    "eval_macro_f1": 0.033358390849091486,
+    "eval_macro_precision": 0.039403090337421884,
+    "eval_macro_recall": 0.07102345263438722,
+    "eval_micro_f1": 0.15015097447158934,
+    "eval_micro_precision": 0.15015097447158934,
+    "eval_micro_recall": 0.15015097447158934,
+    "eval_runtime": 10.4546,
+    "eval_samples_per_second": 348.46,
+    "eval_steps_per_second": 0.765,
+    "eval_weighted_f1": 0.054572202456524575,
+    "eval_weighted_precision": 0.06415751425956755,
+    "eval_weighted_recall": 0.15015097447158934,
+    "total_flos": 8.978215898519175e+18,
+    "train_loss": 4.798892915248871,
+    "train_runtime": 164.8721,
+    "train_samples_per_second": 198.857,
+    "train_steps_per_second": 0.388
+}

checkpoint-35/config.json ADDED Viewed

	@@ -0,0 +1,169 @@

+{
+  "_name_or_path": "microsoft/dit-large",
+  "add_fpn": false,
+  "architectures": [
+    "BeitForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auxiliary_channels": 256,
+  "auxiliary_concat_input": false,
+  "auxiliary_loss_weight": 0.4,
+  "auxiliary_num_convs": 1,
+  "drop_path_rate": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "informe_social",
+    "1": "cargo_ingreso_mpe",
+    "2": "acta_registro_personal",
+    "3": "citacion_pnp",
+    "4": "acta_lectura_derechos_ley30364",
+    "5": "acta_intervencion_minpu",
+    "6": "denuncia_policial",
+    "7": "providencia_fiscal",
+    "8": "consulta_medidas_proteccion",
+    "9": "oficio_medicina_legal",
+    "10": "oficio_evaluacion_psicologica",
+    "11": "resolucion_judicial_audiencia",
+    "12": "certificado_medico_legal",
+    "13": "ficha_datos_sidpol",
+    "14": "consulta_sucamec",
+    "15": "informe_mimp",
+    "16": "ficha_valoracion_riesgo_manual",
+    "17": "oficio_atencion_integral",
+    "18": "escrito_minpu",
+    "19": "informe_psicologico",
+    "20": "ficha_valoracion_riesgo_digital",
+    "21": "informe_medico",
+    "22": "croquis_domicilio",
+    "23": "ficha_datos_reniec",
+    "24": "escrito_pj",
+    "25": "evidencia_chat",
+    "26": "declaracion_minpu",
+    "27": "notificacion_resolucion_judicial_audiencia",
+    "28": "consulta_antecedentes_penales",
+    "29": "notificacion_detencion_pnp",
+    "30": "consulta_personas_sidpol",
+    "31": "escrito_pnp",
+    "32": "constancia_buen_trato",
+    "33": "declaracion_pnp",
+    "34": "escrito_mimp",
+    "35": "acta_intervencion_pnp",
+    "36": "constancia_notificacion",
+    "37": "cargo_ingreso_sij",
+    "38": "anexo_dni",
+    "39": "hoja_blanco",
+    "40": "consulta_requisitorias"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "acta_intervencion_minpu": 5,
+    "acta_intervencion_pnp": 35,
+    "acta_lectura_derechos_ley30364": 4,
+    "acta_registro_personal": 2,
+    "anexo_dni": 38,
+    "cargo_ingreso_mpe": 1,
+    "cargo_ingreso_sij": 37,
+    "certificado_medico_legal": 12,
+    "citacion_pnp": 3,
+    "constancia_buen_trato": 32,
+    "constancia_notificacion": 36,
+    "consulta_antecedentes_penales": 28,
+    "consulta_medidas_proteccion": 8,
+    "consulta_personas_sidpol": 30,
+    "consulta_requisitorias": 40,
+    "consulta_sucamec": 14,
+    "croquis_domicilio": 22,
+    "declaracion_minpu": 26,
+    "declaracion_pnp": 33,
+    "denuncia_policial": 6,
+    "escrito_mimp": 34,
+    "escrito_minpu": 18,
+    "escrito_pj": 24,
+    "escrito_pnp": 31,
+    "evidencia_chat": 25,
+    "ficha_datos_reniec": 23,
+    "ficha_datos_sidpol": 13,
+    "ficha_valoracion_riesgo_digital": 20,
+    "ficha_valoracion_riesgo_manual": 16,
+    "hoja_blanco": 39,
+    "informe_medico": 21,
+    "informe_mimp": 15,
+    "informe_psicologico": 19,
+    "informe_social": 0,
+    "notificacion_detencion_pnp": 29,
+    "notificacion_resolucion_judicial_audiencia": 27,
+    "oficio_atencion_integral": 17,
+    "oficio_evaluacion_psicologica": 10,
+    "oficio_medicina_legal": 9,
+    "providencia_fiscal": 7,
+    "resolucion_judicial_audiencia": 11
+  },
+  "layer_norm_eps": 1e-12,
+  "layer_scale_init_value": 0.1,
+  "model_type": "beit",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "out_features": [
+    "stage3",
+    "stage5",
+    "stage7",
+    "stage11"
+  ],
+  "out_indices": [
+    3,
+    5,
+    7,
+    11
+  ],
+  "patch_size": 16,
+  "pool_scales": [
+    1,
+    2,
+    3,
+    6
+  ],
+  "problem_type": "single_label_classification",
+  "reshape_hidden_states": true,
+  "semantic_loss_ignore_index": 255,
+  "stage_names": [
+    "stem",
+    "stage1",
+    "stage2",
+    "stage3",
+    "stage4",
+    "stage5",
+    "stage6",
+    "stage7",
+    "stage8",
+    "stage9",
+    "stage10",
+    "stage11",
+    "stage12",
+    "stage13",
+    "stage14",
+    "stage15",
+    "stage16",
+    "stage17",
+    "stage18",
+    "stage19",
+    "stage20",
+    "stage21",
+    "stage22",
+    "stage23",
+    "stage24"
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.3",
+  "use_absolute_position_embeddings": true,
+  "use_auxiliary_head": true,
+  "use_mask_token": true,
+  "use_mean_pooling": true,
+  "use_relative_position_bias": false,
+  "use_shared_relative_position_bias": false,
+  "vocab_size": 8192
+}

checkpoint-35/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
+size 1213526036

checkpoint-35/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": false,
+  "do_normalize": true,
+  "do_reduce_labels": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "BeitImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-35/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a16312646f5c5640cf4b41e6af735a1c4608d535ed5abf8c5704b217a771e71a
+size 14244

checkpoint-35/trainer_state.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "best_metric": 0.13999451001921492,
+  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
+  "epoch": 0.5426356589147286,
+  "eval_steps": 35,
+  "global_step": 35,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015503875968992248,
+      "grad_norm": 12.836527824401855,
+      "learning_rate": 0.0002857142857142857,
+      "loss": 5.7815,
+      "step": 1
+    },
+    {
+      "epoch": 0.5426356589147286,
+      "grad_norm": 6.243555545806885,
+      "learning_rate": 0.0010275543423681622,
+      "loss": 4.9108,
+      "step": 35
+    },
+    {
+      "epoch": 0.5426356589147286,
+      "eval_accuracy": 0.13999451001921492,
+      "eval_loss": 5.21905517578125,
+      "eval_macro_f1": 0.04648934733573823,
+      "eval_macro_precision": 0.03716366453858833,
+      "eval_macro_recall": 0.09867228278248098,
+      "eval_micro_f1": 0.13999451001921492,
+      "eval_micro_precision": 0.13999451001921492,
+      "eval_micro_recall": 0.13999451001921492,
+      "eval_runtime": 8.2153,
+      "eval_samples_per_second": 443.44,
+      "eval_steps_per_second": 3.53,
+      "eval_weighted_f1": 0.0724726180615966,
+      "eval_weighted_precision": 0.06178867753876749,
+      "eval_weighted_recall": 0.13999451001921492,
+      "step": 35
+    }
+  ],
+  "logging_steps": 35,
+  "max_steps": 64,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 35,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.909961819502674e+18,
+  "train_batch_size": 256,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-64/config.json ADDED Viewed

	@@ -0,0 +1,169 @@

+{
+  "_name_or_path": "microsoft/dit-large",
+  "add_fpn": false,
+  "architectures": [
+    "BeitForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auxiliary_channels": 256,
+  "auxiliary_concat_input": false,
+  "auxiliary_loss_weight": 0.4,
+  "auxiliary_num_convs": 1,
+  "drop_path_rate": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "informe_social",
+    "1": "cargo_ingreso_mpe",
+    "2": "acta_registro_personal",
+    "3": "citacion_pnp",
+    "4": "acta_lectura_derechos_ley30364",
+    "5": "acta_intervencion_minpu",
+    "6": "denuncia_policial",
+    "7": "providencia_fiscal",
+    "8": "consulta_medidas_proteccion",
+    "9": "oficio_medicina_legal",
+    "10": "oficio_evaluacion_psicologica",
+    "11": "resolucion_judicial_audiencia",
+    "12": "certificado_medico_legal",
+    "13": "ficha_datos_sidpol",
+    "14": "consulta_sucamec",
+    "15": "informe_mimp",
+    "16": "ficha_valoracion_riesgo_manual",
+    "17": "oficio_atencion_integral",
+    "18": "escrito_minpu",
+    "19": "informe_psicologico",
+    "20": "ficha_valoracion_riesgo_digital",
+    "21": "informe_medico",
+    "22": "croquis_domicilio",
+    "23": "ficha_datos_reniec",
+    "24": "escrito_pj",
+    "25": "evidencia_chat",
+    "26": "declaracion_minpu",
+    "27": "notificacion_resolucion_judicial_audiencia",
+    "28": "consulta_antecedentes_penales",
+    "29": "notificacion_detencion_pnp",
+    "30": "consulta_personas_sidpol",
+    "31": "escrito_pnp",
+    "32": "constancia_buen_trato",
+    "33": "declaracion_pnp",
+    "34": "escrito_mimp",
+    "35": "acta_intervencion_pnp",
+    "36": "constancia_notificacion",
+    "37": "cargo_ingreso_sij",
+    "38": "anexo_dni",
+    "39": "hoja_blanco",
+    "40": "consulta_requisitorias"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "acta_intervencion_minpu": 5,
+    "acta_intervencion_pnp": 35,
+    "acta_lectura_derechos_ley30364": 4,
+    "acta_registro_personal": 2,
+    "anexo_dni": 38,
+    "cargo_ingreso_mpe": 1,
+    "cargo_ingreso_sij": 37,
+    "certificado_medico_legal": 12,
+    "citacion_pnp": 3,
+    "constancia_buen_trato": 32,
+    "constancia_notificacion": 36,
+    "consulta_antecedentes_penales": 28,
+    "consulta_medidas_proteccion": 8,
+    "consulta_personas_sidpol": 30,
+    "consulta_requisitorias": 40,
+    "consulta_sucamec": 14,
+    "croquis_domicilio": 22,
+    "declaracion_minpu": 26,
+    "declaracion_pnp": 33,
+    "denuncia_policial": 6,
+    "escrito_mimp": 34,
+    "escrito_minpu": 18,
+    "escrito_pj": 24,
+    "escrito_pnp": 31,
+    "evidencia_chat": 25,
+    "ficha_datos_reniec": 23,
+    "ficha_datos_sidpol": 13,
+    "ficha_valoracion_riesgo_digital": 20,
+    "ficha_valoracion_riesgo_manual": 16,
+    "hoja_blanco": 39,
+    "informe_medico": 21,
+    "informe_mimp": 15,
+    "informe_psicologico": 19,
+    "informe_social": 0,
+    "notificacion_detencion_pnp": 29,
+    "notificacion_resolucion_judicial_audiencia": 27,
+    "oficio_atencion_integral": 17,
+    "oficio_evaluacion_psicologica": 10,
+    "oficio_medicina_legal": 9,
+    "providencia_fiscal": 7,
+    "resolucion_judicial_audiencia": 11
+  },
+  "layer_norm_eps": 1e-12,
+  "layer_scale_init_value": 0.1,
+  "model_type": "beit",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "out_features": [
+    "stage3",
+    "stage5",
+    "stage7",
+    "stage11"
+  ],
+  "out_indices": [
+    3,
+    5,
+    7,
+    11
+  ],
+  "patch_size": 16,
+  "pool_scales": [
+    1,
+    2,
+    3,
+    6
+  ],
+  "problem_type": "single_label_classification",
+  "reshape_hidden_states": true,
+  "semantic_loss_ignore_index": 255,
+  "stage_names": [
+    "stem",
+    "stage1",
+    "stage2",
+    "stage3",
+    "stage4",
+    "stage5",
+    "stage6",
+    "stage7",
+    "stage8",
+    "stage9",
+    "stage10",
+    "stage11",
+    "stage12",
+    "stage13",
+    "stage14",
+    "stage15",
+    "stage16",
+    "stage17",
+    "stage18",
+    "stage19",
+    "stage20",
+    "stage21",
+    "stage22",
+    "stage23",
+    "stage24"
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.3",
+  "use_absolute_position_embeddings": true,
+  "use_auxiliary_head": true,
+  "use_mask_token": true,
+  "use_mean_pooling": true,
+  "use_relative_position_bias": false,
+  "use_shared_relative_position_bias": false,
+  "vocab_size": 8192
+}

checkpoint-64/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b95a5acea4810d62c8e91883a28ffee70ed6514bb963bcbeff5fd3d711ff1f10
+size 1213526036

checkpoint-64/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": false,
+  "do_normalize": true,
+  "do_reduce_labels": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "BeitImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-64/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd323ff31f4540314edfc5f0517b90e0430bc578b03d774b83916b841baca9c
+size 14244

checkpoint-64/trainer_state.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "best_metric": 0.13999451001921492,
+  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
+  "epoch": 0.9922480620155039,
+  "eval_steps": 35,
+  "global_step": 64,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015503875968992248,
+      "grad_norm": 12.836527824401855,
+      "learning_rate": 0.0002857142857142857,
+      "loss": 5.7815,
+      "step": 1
+    },
+    {
+      "epoch": 0.5426356589147286,
+      "grad_norm": 6.243555545806885,
+      "learning_rate": 0.0010275543423681622,
+      "loss": 4.9108,
+      "step": 35
+    },
+    {
+      "epoch": 0.5426356589147286,
+      "eval_accuracy": 0.13999451001921492,
+      "eval_loss": 5.21905517578125,
+      "eval_macro_f1": 0.04648934733573823,
+      "eval_macro_precision": 0.03716366453858833,
+      "eval_macro_recall": 0.09867228278248098,
+      "eval_micro_f1": 0.13999451001921492,
+      "eval_micro_precision": 0.13999451001921492,
+      "eval_micro_recall": 0.13999451001921492,
+      "eval_runtime": 8.2153,
+      "eval_samples_per_second": 443.44,
+      "eval_steps_per_second": 3.53,
+      "eval_weighted_f1": 0.0724726180615966,
+      "eval_weighted_precision": 0.06178867753876749,
+      "eval_weighted_recall": 0.13999451001921492,
+      "step": 35
+    }
+  ],
+  "logging_steps": 35,
+  "max_steps": 64,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 35,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.978215898519175e+18,
+  "train_batch_size": 256,
+  "trial_name": null,
+  "trial_params": null
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "eval_accuracy": 0.15015097447158934,
+    "eval_loss": 4.886590003967285,
+    "eval_macro_f1": 0.033358390849091486,
+    "eval_macro_precision": 0.039403090337421884,
+    "eval_macro_recall": 0.07102345263438722,
+    "eval_micro_f1": 0.15015097447158934,
+    "eval_micro_precision": 0.15015097447158934,
+    "eval_micro_recall": 0.15015097447158934,
+    "eval_runtime": 10.4546,
+    "eval_samples_per_second": 348.46,
+    "eval_steps_per_second": 0.765,
+    "eval_weighted_f1": 0.054572202456524575,
+    "eval_weighted_precision": 0.06415751425956755,
+    "eval_weighted_recall": 0.15015097447158934
+}

metrics.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "train_runtime": 163.0123,
+  "train_samples_per_second": 201.126,
+  "train_steps_per_second": 0.393,
+  "total_flos": 8.978215898519175e+18,
+  "train_loss": 4.277425870299339,
+  "epoch": 0.9922480620155039
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d4ba2f71de80d1f45315dc0f68b08a4f8ef93be9c2b07beec6073786df357e9
 size 1213526036

 version https://git-lfs.github.com/spec/v1
+oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
 size 1213526036

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.9922480620155039,
+    "total_flos": 8.978215898519175e+18,
+    "train_loss": 4.798892915248871,
+    "train_runtime": 164.8721,
+    "train_samples_per_second": 198.857,
+    "train_steps_per_second": 0.388
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "best_metric": 0.13999451001921492,
+  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
+  "epoch": 0.9922480620155039,
+  "eval_steps": 35,
+  "global_step": 64,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015503875968992248,
+      "grad_norm": 12.836527824401855,
+      "learning_rate": 0.0002857142857142857,
+      "loss": 5.7815,
+      "step": 1
+    },
+    {
+      "epoch": 0.5426356589147286,
+      "grad_norm": 6.243555545806885,
+      "learning_rate": 0.0010275543423681622,
+      "loss": 4.9108,
+      "step": 35
+    },
+    {
+      "epoch": 0.5426356589147286,
+      "eval_accuracy": 0.13999451001921492,
+      "eval_loss": 5.21905517578125,
+      "eval_macro_f1": 0.04648934733573823,
+      "eval_macro_precision": 0.03716366453858833,
+      "eval_macro_recall": 0.09867228278248098,
+      "eval_micro_f1": 0.13999451001921492,
+      "eval_micro_precision": 0.13999451001921492,
+      "eval_micro_recall": 0.13999451001921492,
+      "eval_runtime": 8.2153,
+      "eval_samples_per_second": 443.44,
+      "eval_steps_per_second": 3.53,
+      "eval_weighted_f1": 0.0724726180615966,
+      "eval_weighted_precision": 0.06178867753876749,
+      "eval_weighted_recall": 0.13999451001921492,
+      "step": 35
+    },
+    {
+      "epoch": 0.9922480620155039,
+      "step": 64,
+      "total_flos": 8.978215898519175e+18,
+      "train_loss": 4.277425870299339,
+      "train_runtime": 163.0123,
+      "train_samples_per_second": 201.126,
+      "train_steps_per_second": 0.393
+    }
+  ],
+  "logging_steps": 35,
+  "max_steps": 64,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 35,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.978215898519175e+18,
+  "train_batch_size": 256,
+  "trial_name": null,
+  "trial_params": null
+}

training_metrics.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "train": {
+    "train_runtime": 163.1429,
+    "train_samples_per_second": 200.965,
+    "train_steps_per_second": 0.392,
+    "total_flos": 8.978215898519175e+18,
+    "train_loss": 5.019116312265396,
+    "epoch": 0.9922480620155039
+  },
+  "eval": {
+    "eval_loss": 4.839980125427246,
+    "eval_accuracy": 0.13889651386220148,
+    "eval_weighted_f1": 0.049892274188486484,
+    "eval_micro_f1": 0.13889651386220148,
+    "eval_macro_f1": 0.027543503036345927,
+    "eval_weighted_recall": 0.13889651386220148,
+    "eval_micro_recall": 0.13889651386220148,
+    "eval_macro_recall": 0.05869405594955945,
+    "eval_weighted_precision": 0.031947616026715114,
+    "eval_micro_precision": 0.13889651386220148,
+    "eval_macro_precision": 0.018972609174899638,
+    "eval_runtime": 10.6093,
+    "eval_samples_per_second": 343.379,
+    "eval_steps_per_second": 0.754
+  }
+}