End of training

Browse files

Files changed (11) hide show

README.md +65 -0
all_results.json +13 -0
config.json +38 -0
eval_results.json +8 -0
model.safetensors +3 -0
preprocessor_config.json +36 -0
runs/Jun02_11-14-26_4d71aa98b00b/events.out.tfevents.1717326867.4d71aa98b00b.34.0 +3 -0
runs/Jun02_11-14-26_4d71aa98b00b/events.out.tfevents.1717329814.4d71aa98b00b.34.1 +3 -0
train_results.json +8 -0
trainer_state.json +824 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,65 @@

+---
+license: apache-2.0
+base_model: google/vit-base-patch16-224-in21k
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: finetune-apple-leaf
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# finetune-apple-leaf
+This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0682
+- Accuracy: 0.9957
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 64
+- eval_batch_size: 64
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 5
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 0.1988        | 1.0   | 209  | 0.1881          | 0.9957   |
+| 0.1012        | 2.0   | 418  | 0.1110          | 0.9953   |
+| 0.1171        | 3.0   | 627  | 0.0925          | 0.9928   |
+| 0.0766        | 4.0   | 836  | 0.0707          | 0.9966   |
+| 0.0723        | 5.0   | 1045 | 0.0682          | 0.9957   |
+### Framework versions
+- Transformers 4.41.1
+- Pytorch 2.1.2
+- Datasets 2.19.1
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 5.0,
+    "eval_accuracy": 0.9957482993197279,
+    "eval_loss": 0.0682438537478447,
+    "eval_runtime": 40.0762,
+    "eval_samples_per_second": 58.688,
+    "eval_steps_per_second": 0.923,
+    "total_flos": 5.1638175692258e+18,
+    "train_loss": 0.20529093819371821,
+    "train_runtime": 2500.4647,
+    "train_samples_per_second": 26.649,
+    "train_steps_per_second": 0.418
+}

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_or_path": "google/vit-base-patch16-224-in21k",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Apple Rust",
+    "1": "Apple Scab",
+    "2": "Black Rot",
+    "3": "Healthy",
+    "4": "Uknown"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Apple Rust": 0,
+    "Apple Scab": 1,
+    "Black Rot": 2,
+    "Healthy": 3,
+    "Uknown": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.1"
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "eval_accuracy": 0.9957482993197279,
+    "eval_loss": 0.0682438537478447,
+    "eval_runtime": 40.0762,
+    "eval_samples_per_second": 58.688,
+    "eval_steps_per_second": 0.923
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a2ce5434427e2be0a6ed1a16cec05fb055bf9718452c547c4554e4dd98a923
+size 343233204

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTFeatureExtractor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

runs/Jun02_11-14-26_4d71aa98b00b/events.out.tfevents.1717326867.4d71aa98b00b.34.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7ad77aac6ce72735bf044205bca45985c9551983091312aba1a18fe47a86477
+size 28815

runs/Jun02_11-14-26_4d71aa98b00b/events.out.tfevents.1717329814.4d71aa98b00b.34.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf62c044a6ad9ffe88063353f89bcb9d21ecb4e3481e09d6b53f0b07e0f35b8c
+size 411

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "total_flos": 5.1638175692258e+18,
+    "train_loss": 0.20529093819371821,
+    "train_runtime": 2500.4647,
+    "train_samples_per_second": 26.649,
+    "train_steps_per_second": 0.418
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,824 @@

+{
+  "best_metric": 0.0682438537478447,
+  "best_model_checkpoint": "finetune-apple-leaf/checkpoint-1045",
+  "epoch": 5.0,
+  "eval_steps": 100,
+  "global_step": 1045,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04784688995215311,
+      "grad_norm": 68452.109375,
+      "learning_rate": 1.980861244019139e-05,
+      "loss": 1.5577,
+      "step": 10
+    },
+    {
+      "epoch": 0.09569377990430622,
+      "grad_norm": 81296.0546875,
+      "learning_rate": 1.9617224880382777e-05,
+      "loss": 1.4263,
+      "step": 20
+    },
+    {
+      "epoch": 0.14354066985645933,
+      "grad_norm": 91526.734375,
+      "learning_rate": 1.9425837320574165e-05,
+      "loss": 1.2775,
+      "step": 30
+    },
+    {
+      "epoch": 0.19138755980861244,
+      "grad_norm": 82238.640625,
+      "learning_rate": 1.9234449760765553e-05,
+      "loss": 1.144,
+      "step": 40
+    },
+    {
+      "epoch": 0.23923444976076555,
+      "grad_norm": 93808.3671875,
+      "learning_rate": 1.904306220095694e-05,
+      "loss": 0.9826,
+      "step": 50
+    },
+    {
+      "epoch": 0.28708133971291866,
+      "grad_norm": 92552.0625,
+      "learning_rate": 1.8851674641148328e-05,
+      "loss": 0.834,
+      "step": 60
+    },
+    {
+      "epoch": 0.3349282296650718,
+      "grad_norm": 97152.6953125,
+      "learning_rate": 1.8660287081339713e-05,
+      "loss": 0.7036,
+      "step": 70
+    },
+    {
+      "epoch": 0.3827751196172249,
+      "grad_norm": 72807.1796875,
+      "learning_rate": 1.8468899521531104e-05,
+      "loss": 0.5867,
+      "step": 80
+    },
+    {
+      "epoch": 0.430622009569378,
+      "grad_norm": 80641.046875,
+      "learning_rate": 1.8277511961722488e-05,
+      "loss": 0.5116,
+      "step": 90
+    },
+    {
+      "epoch": 0.4784688995215311,
+      "grad_norm": 88719.7265625,
+      "learning_rate": 1.8086124401913876e-05,
+      "loss": 0.4345,
+      "step": 100
+    },
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 87248.7109375,
+      "learning_rate": 1.7894736842105264e-05,
+      "loss": 0.3765,
+      "step": 110
+    },
+    {
+      "epoch": 0.5741626794258373,
+      "grad_norm": 74184.46875,
+      "learning_rate": 1.770334928229665e-05,
+      "loss": 0.3363,
+      "step": 120
+    },
+    {
+      "epoch": 0.6220095693779905,
+      "grad_norm": 76457.8125,
+      "learning_rate": 1.751196172248804e-05,
+      "loss": 0.3341,
+      "step": 130
+    },
+    {
+      "epoch": 0.6698564593301436,
+      "grad_norm": 59443.3984375,
+      "learning_rate": 1.7320574162679427e-05,
+      "loss": 0.2943,
+      "step": 140
+    },
+    {
+      "epoch": 0.7177033492822966,
+      "grad_norm": 98078.953125,
+      "learning_rate": 1.7129186602870815e-05,
+      "loss": 0.2792,
+      "step": 150
+    },
+    {
+      "epoch": 0.7655502392344498,
+      "grad_norm": 68582.828125,
+      "learning_rate": 1.6937799043062203e-05,
+      "loss": 0.2567,
+      "step": 160
+    },
+    {
+      "epoch": 0.8133971291866029,
+      "grad_norm": 60306.1328125,
+      "learning_rate": 1.674641148325359e-05,
+      "loss": 0.2431,
+      "step": 170
+    },
+    {
+      "epoch": 0.861244019138756,
+      "grad_norm": 72700.328125,
+      "learning_rate": 1.6555023923444978e-05,
+      "loss": 0.2115,
+      "step": 180
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 152611.125,
+      "learning_rate": 1.6363636363636366e-05,
+      "loss": 0.222,
+      "step": 190
+    },
+    {
+      "epoch": 0.9569377990430622,
+      "grad_norm": 61888.64453125,
+      "learning_rate": 1.6172248803827754e-05,
+      "loss": 0.1988,
+      "step": 200
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9957482993197279,
+      "eval_loss": 0.18814513087272644,
+      "eval_runtime": 39.0452,
+      "eval_samples_per_second": 60.238,
+      "eval_steps_per_second": 0.948,
+      "step": 209
+    },
+    {
+      "epoch": 1.0047846889952152,
+      "grad_norm": 93927.0234375,
+      "learning_rate": 1.5980861244019138e-05,
+      "loss": 0.215,
+      "step": 210
+    },
+    {
+      "epoch": 1.0526315789473684,
+      "grad_norm": 50695.28125,
+      "learning_rate": 1.578947368421053e-05,
+      "loss": 0.1936,
+      "step": 220
+    },
+    {
+      "epoch": 1.1004784688995215,
+      "grad_norm": 150133.75,
+      "learning_rate": 1.5598086124401914e-05,
+      "loss": 0.1818,
+      "step": 230
+    },
+    {
+      "epoch": 1.1483253588516746,
+      "grad_norm": 91581.5078125,
+      "learning_rate": 1.5406698564593305e-05,
+      "loss": 0.1819,
+      "step": 240
+    },
+    {
+      "epoch": 1.1961722488038278,
+      "grad_norm": 53073.0234375,
+      "learning_rate": 1.5215311004784689e-05,
+      "loss": 0.1618,
+      "step": 250
+    },
+    {
+      "epoch": 1.244019138755981,
+      "grad_norm": 112323.625,
+      "learning_rate": 1.5023923444976079e-05,
+      "loss": 0.165,
+      "step": 260
+    },
+    {
+      "epoch": 1.291866028708134,
+      "grad_norm": 97414.734375,
+      "learning_rate": 1.4832535885167465e-05,
+      "loss": 0.2017,
+      "step": 270
+    },
+    {
+      "epoch": 1.339712918660287,
+      "grad_norm": 162907.59375,
+      "learning_rate": 1.4641148325358854e-05,
+      "loss": 0.1762,
+      "step": 280
+    },
+    {
+      "epoch": 1.38755980861244,
+      "grad_norm": 27552.251953125,
+      "learning_rate": 1.444976076555024e-05,
+      "loss": 0.1715,
+      "step": 290
+    },
+    {
+      "epoch": 1.4354066985645932,
+      "grad_norm": 108859.3671875,
+      "learning_rate": 1.4258373205741626e-05,
+      "loss": 0.153,
+      "step": 300
+    },
+    {
+      "epoch": 1.4832535885167464,
+      "grad_norm": 73719.96875,
+      "learning_rate": 1.4066985645933016e-05,
+      "loss": 0.14,
+      "step": 310
+    },
+    {
+      "epoch": 1.5311004784688995,
+      "grad_norm": 21314.03125,
+      "learning_rate": 1.3875598086124402e-05,
+      "loss": 0.1354,
+      "step": 320
+    },
+    {
+      "epoch": 1.5789473684210527,
+      "grad_norm": 58937.32421875,
+      "learning_rate": 1.3684210526315791e-05,
+      "loss": 0.1389,
+      "step": 330
+    },
+    {
+      "epoch": 1.6267942583732058,
+      "grad_norm": 56629.7578125,
+      "learning_rate": 1.3492822966507177e-05,
+      "loss": 0.1402,
+      "step": 340
+    },
+    {
+      "epoch": 1.674641148325359,
+      "grad_norm": 59540.6953125,
+      "learning_rate": 1.3301435406698567e-05,
+      "loss": 0.1347,
+      "step": 350
+    },
+    {
+      "epoch": 1.722488038277512,
+      "grad_norm": 81783.2421875,
+      "learning_rate": 1.3110047846889953e-05,
+      "loss": 0.1448,
+      "step": 360
+    },
+    {
+      "epoch": 1.7703349282296652,
+      "grad_norm": 90950.421875,
+      "learning_rate": 1.2918660287081342e-05,
+      "loss": 0.1503,
+      "step": 370
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 149309.421875,
+      "learning_rate": 1.2727272727272728e-05,
+      "loss": 0.1464,
+      "step": 380
+    },
+    {
+      "epoch": 1.8660287081339713,
+      "grad_norm": 83194.9375,
+      "learning_rate": 1.2535885167464116e-05,
+      "loss": 0.1394,
+      "step": 390
+    },
+    {
+      "epoch": 1.9138755980861244,
+      "grad_norm": 124922.578125,
+      "learning_rate": 1.2344497607655504e-05,
+      "loss": 0.1245,
+      "step": 400
+    },
+    {
+      "epoch": 1.9617224880382775,
+      "grad_norm": 52044.2578125,
+      "learning_rate": 1.215311004784689e-05,
+      "loss": 0.1012,
+      "step": 410
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9953231292517006,
+      "eval_loss": 0.1110498234629631,
+      "eval_runtime": 39.4692,
+      "eval_samples_per_second": 59.591,
+      "eval_steps_per_second": 0.937,
+      "step": 418
+    },
+    {
+      "epoch": 2.0095693779904304,
+      "grad_norm": 78857.1328125,
+      "learning_rate": 1.196172248803828e-05,
+      "loss": 0.1258,
+      "step": 420
+    },
+    {
+      "epoch": 2.0574162679425836,
+      "grad_norm": 74979.1796875,
+      "learning_rate": 1.1770334928229666e-05,
+      "loss": 0.1325,
+      "step": 430
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "grad_norm": 85820.78125,
+      "learning_rate": 1.1578947368421053e-05,
+      "loss": 0.1231,
+      "step": 440
+    },
+    {
+      "epoch": 2.15311004784689,
+      "grad_norm": 58782.41015625,
+      "learning_rate": 1.1387559808612441e-05,
+      "loss": 0.1282,
+      "step": 450
+    },
+    {
+      "epoch": 2.200956937799043,
+      "grad_norm": 21210.45703125,
+      "learning_rate": 1.1196172248803829e-05,
+      "loss": 0.1087,
+      "step": 460
+    },
+    {
+      "epoch": 2.248803827751196,
+      "grad_norm": 80271.7265625,
+      "learning_rate": 1.1004784688995217e-05,
+      "loss": 0.1192,
+      "step": 470
+    },
+    {
+      "epoch": 2.2966507177033493,
+      "grad_norm": 32322.310546875,
+      "learning_rate": 1.0813397129186604e-05,
+      "loss": 0.1098,
+      "step": 480
+    },
+    {
+      "epoch": 2.3444976076555024,
+      "grad_norm": 113407.7578125,
+      "learning_rate": 1.062200956937799e-05,
+      "loss": 0.1216,
+      "step": 490
+    },
+    {
+      "epoch": 2.3923444976076556,
+      "grad_norm": 194755.9375,
+      "learning_rate": 1.0430622009569378e-05,
+      "loss": 0.116,
+      "step": 500
+    },
+    {
+      "epoch": 2.4401913875598087,
+      "grad_norm": 23810.296875,
+      "learning_rate": 1.0239234449760766e-05,
+      "loss": 0.1026,
+      "step": 510
+    },
+    {
+      "epoch": 2.488038277511962,
+      "grad_norm": 49451.3125,
+      "learning_rate": 1.0047846889952154e-05,
+      "loss": 0.1217,
+      "step": 520
+    },
+    {
+      "epoch": 2.535885167464115,
+      "grad_norm": 85535.9609375,
+      "learning_rate": 9.856459330143542e-06,
+      "loss": 0.1324,
+      "step": 530
+    },
+    {
+      "epoch": 2.583732057416268,
+      "grad_norm": 77596.9453125,
+      "learning_rate": 9.66507177033493e-06,
+      "loss": 0.1154,
+      "step": 540
+    },
+    {
+      "epoch": 2.6315789473684212,
+      "grad_norm": 196213.265625,
+      "learning_rate": 9.473684210526315e-06,
+      "loss": 0.1234,
+      "step": 550
+    },
+    {
+      "epoch": 2.679425837320574,
+      "grad_norm": 129738.171875,
+      "learning_rate": 9.282296650717703e-06,
+      "loss": 0.0983,
+      "step": 560
+    },
+    {
+      "epoch": 2.7272727272727275,
+      "grad_norm": 123404.078125,
+      "learning_rate": 9.090909090909091e-06,
+      "loss": 0.1187,
+      "step": 570
+    },
+    {
+      "epoch": 2.77511961722488,
+      "grad_norm": 163260.46875,
+      "learning_rate": 8.899521531100479e-06,
+      "loss": 0.1145,
+      "step": 580
+    },
+    {
+      "epoch": 2.8229665071770333,
+      "grad_norm": 92286.703125,
+      "learning_rate": 8.708133971291867e-06,
+      "loss": 0.1001,
+      "step": 590
+    },
+    {
+      "epoch": 2.8708133971291865,
+      "grad_norm": 107133.9375,
+      "learning_rate": 8.516746411483254e-06,
+      "loss": 0.0953,
+      "step": 600
+    },
+    {
+      "epoch": 2.9186602870813396,
+      "grad_norm": 52350.8359375,
+      "learning_rate": 8.325358851674642e-06,
+      "loss": 0.0981,
+      "step": 610
+    },
+    {
+      "epoch": 2.9665071770334928,
+      "grad_norm": 33242.46875,
+      "learning_rate": 8.13397129186603e-06,
+      "loss": 0.1171,
+      "step": 620
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9927721088435374,
+      "eval_loss": 0.0925019159913063,
+      "eval_runtime": 39.5447,
+      "eval_samples_per_second": 59.477,
+      "eval_steps_per_second": 0.936,
+      "step": 627
+    },
+    {
+      "epoch": 3.014354066985646,
+      "grad_norm": 24220.681640625,
+      "learning_rate": 7.942583732057418e-06,
+      "loss": 0.0879,
+      "step": 630
+    },
+    {
+      "epoch": 3.062200956937799,
+      "grad_norm": 161409.765625,
+      "learning_rate": 7.751196172248805e-06,
+      "loss": 0.098,
+      "step": 640
+    },
+    {
+      "epoch": 3.110047846889952,
+      "grad_norm": 137376.515625,
+      "learning_rate": 7.5598086124401915e-06,
+      "loss": 0.0991,
+      "step": 650
+    },
+    {
+      "epoch": 3.1578947368421053,
+      "grad_norm": 17668.015625,
+      "learning_rate": 7.368421052631579e-06,
+      "loss": 0.0898,
+      "step": 660
+    },
+    {
+      "epoch": 3.2057416267942584,
+      "grad_norm": 168850.53125,
+      "learning_rate": 7.177033492822967e-06,
+      "loss": 0.0787,
+      "step": 670
+    },
+    {
+      "epoch": 3.2535885167464116,
+      "grad_norm": 24098.3203125,
+      "learning_rate": 6.985645933014355e-06,
+      "loss": 0.1136,
+      "step": 680
+    },
+    {
+      "epoch": 3.3014354066985647,
+      "grad_norm": 184889.9375,
+      "learning_rate": 6.794258373205742e-06,
+      "loss": 0.0899,
+      "step": 690
+    },
+    {
+      "epoch": 3.349282296650718,
+      "grad_norm": 154068.140625,
+      "learning_rate": 6.6028708133971295e-06,
+      "loss": 0.072,
+      "step": 700
+    },
+    {
+      "epoch": 3.397129186602871,
+      "grad_norm": 213492.796875,
+      "learning_rate": 6.411483253588517e-06,
+      "loss": 0.095,
+      "step": 710
+    },
+    {
+      "epoch": 3.444976076555024,
+      "grad_norm": 79102.0859375,
+      "learning_rate": 6.220095693779905e-06,
+      "loss": 0.0852,
+      "step": 720
+    },
+    {
+      "epoch": 3.492822966507177,
+      "grad_norm": 158182.59375,
+      "learning_rate": 6.028708133971293e-06,
+      "loss": 0.079,
+      "step": 730
+    },
+    {
+      "epoch": 3.5406698564593304,
+      "grad_norm": 12833.8759765625,
+      "learning_rate": 5.837320574162681e-06,
+      "loss": 0.0881,
+      "step": 740
+    },
+    {
+      "epoch": 3.588516746411483,
+      "grad_norm": 124951.265625,
+      "learning_rate": 5.645933014354067e-06,
+      "loss": 0.0963,
+      "step": 750
+    },
+    {
+      "epoch": 3.6363636363636362,
+      "grad_norm": 207838.703125,
+      "learning_rate": 5.4545454545454545e-06,
+      "loss": 0.0967,
+      "step": 760
+    },
+    {
+      "epoch": 3.6842105263157894,
+      "grad_norm": 15068.2353515625,
+      "learning_rate": 5.263157894736842e-06,
+      "loss": 0.081,
+      "step": 770
+    },
+    {
+      "epoch": 3.7320574162679425,
+      "grad_norm": 20366.380859375,
+      "learning_rate": 5.07177033492823e-06,
+      "loss": 0.0824,
+      "step": 780
+    },
+    {
+      "epoch": 3.7799043062200957,
+      "grad_norm": 11184.1630859375,
+      "learning_rate": 4.880382775119618e-06,
+      "loss": 0.1021,
+      "step": 790
+    },
+    {
+      "epoch": 3.827751196172249,
+      "grad_norm": 41858.83203125,
+      "learning_rate": 4.6889952153110055e-06,
+      "loss": 0.071,
+      "step": 800
+    },
+    {
+      "epoch": 3.875598086124402,
+      "grad_norm": 11330.4501953125,
+      "learning_rate": 4.4976076555023925e-06,
+      "loss": 0.0725,
+      "step": 810
+    },
+    {
+      "epoch": 3.923444976076555,
+      "grad_norm": 143718.625,
+      "learning_rate": 4.30622009569378e-06,
+      "loss": 0.0938,
+      "step": 820
+    },
+    {
+      "epoch": 3.971291866028708,
+      "grad_norm": 89987.9453125,
+      "learning_rate": 4.114832535885168e-06,
+      "loss": 0.0766,
+      "step": 830
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9965986394557823,
+      "eval_loss": 0.07070581614971161,
+      "eval_runtime": 39.4603,
+      "eval_samples_per_second": 59.604,
+      "eval_steps_per_second": 0.938,
+      "step": 836
+    },
+    {
+      "epoch": 4.019138755980861,
+      "grad_norm": 57436.94140625,
+      "learning_rate": 3.923444976076555e-06,
+      "loss": 0.0945,
+      "step": 840
+    },
+    {
+      "epoch": 4.0669856459330145,
+      "grad_norm": 40826.1953125,
+      "learning_rate": 3.732057416267943e-06,
+      "loss": 0.0743,
+      "step": 850
+    },
+    {
+      "epoch": 4.114832535885167,
+      "grad_norm": 29963.001953125,
+      "learning_rate": 3.5406698564593305e-06,
+      "loss": 0.0962,
+      "step": 860
+    },
+    {
+      "epoch": 4.162679425837321,
+      "grad_norm": 32081.44921875,
+      "learning_rate": 3.3492822966507182e-06,
+      "loss": 0.0883,
+      "step": 870
+    },
+    {
+      "epoch": 4.2105263157894735,
+      "grad_norm": 65598.9375,
+      "learning_rate": 3.157894736842105e-06,
+      "loss": 0.0753,
+      "step": 880
+    },
+    {
+      "epoch": 4.258373205741627,
+      "grad_norm": 72391.234375,
+      "learning_rate": 2.966507177033493e-06,
+      "loss": 0.0764,
+      "step": 890
+    },
+    {
+      "epoch": 4.30622009569378,
+      "grad_norm": 82336.0625,
+      "learning_rate": 2.7751196172248807e-06,
+      "loss": 0.0594,
+      "step": 900
+    },
+    {
+      "epoch": 4.354066985645933,
+      "grad_norm": 126016.4609375,
+      "learning_rate": 2.5837320574162685e-06,
+      "loss": 0.0782,
+      "step": 910
+    },
+    {
+      "epoch": 4.401913875598086,
+      "grad_norm": 10192.6044921875,
+      "learning_rate": 2.392344497607656e-06,
+      "loss": 0.0705,
+      "step": 920
+    },
+    {
+      "epoch": 4.44976076555024,
+      "grad_norm": 226228.453125,
+      "learning_rate": 2.200956937799043e-06,
+      "loss": 0.0793,
+      "step": 930
+    },
+    {
+      "epoch": 4.497607655502392,
+      "grad_norm": 11644.4921875,
+      "learning_rate": 2.0095693779904305e-06,
+      "loss": 0.0687,
+      "step": 940
+    },
+    {
+      "epoch": 4.545454545454545,
+      "grad_norm": 71224.4609375,
+      "learning_rate": 1.8181818181818183e-06,
+      "loss": 0.0626,
+      "step": 950
+    },
+    {
+      "epoch": 4.5933014354066986,
+      "grad_norm": 15732.5244140625,
+      "learning_rate": 1.6267942583732059e-06,
+      "loss": 0.0764,
+      "step": 960
+    },
+    {
+      "epoch": 4.641148325358852,
+      "grad_norm": 49954.69921875,
+      "learning_rate": 1.4354066985645934e-06,
+      "loss": 0.0941,
+      "step": 970
+    },
+    {
+      "epoch": 4.688995215311005,
+      "grad_norm": 196565.375,
+      "learning_rate": 1.244019138755981e-06,
+      "loss": 0.0802,
+      "step": 980
+    },
+    {
+      "epoch": 4.7368421052631575,
+      "grad_norm": 29685.708984375,
+      "learning_rate": 1.0526315789473685e-06,
+      "loss": 0.0656,
+      "step": 990
+    },
+    {
+      "epoch": 4.784688995215311,
+      "grad_norm": 31311.546875,
+      "learning_rate": 8.612440191387561e-07,
+      "loss": 0.0908,
+      "step": 1000
+    },
+    {
+      "epoch": 4.832535885167464,
+      "grad_norm": 75467.4765625,
+      "learning_rate": 6.698564593301436e-07,
+      "loss": 0.0755,
+      "step": 1010
+    },
+    {
+      "epoch": 4.880382775119617,
+      "grad_norm": 51163.75390625,
+      "learning_rate": 4.784688995215311e-07,
+      "loss": 0.0836,
+      "step": 1020
+    },
+    {
+      "epoch": 4.92822966507177,
+      "grad_norm": 180410.21875,
+      "learning_rate": 2.870813397129187e-07,
+      "loss": 0.0815,
+      "step": 1030
+    },
+    {
+      "epoch": 4.976076555023924,
+      "grad_norm": 187452.96875,
+      "learning_rate": 9.569377990430622e-08,
+      "loss": 0.0723,
+      "step": 1040
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9957482993197279,
+      "eval_loss": 0.0682438537478447,
+      "eval_runtime": 38.7752,
+      "eval_samples_per_second": 60.657,
+      "eval_steps_per_second": 0.954,
+      "step": 1045
+    },
+    {
+      "epoch": 5.0,
+      "step": 1045,
+      "total_flos": 5.1638175692258e+18,
+      "train_loss": 0.20529093819371821,
+      "train_runtime": 2500.4647,
+      "train_samples_per_second": 26.649,
+      "train_steps_per_second": 0.418
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1045,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.1638175692258e+18,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f7d7582e978604d3f252d76d6c8ac401fe7d5508d003dd429c9a25147cb9090
+size 5112