anh-dangminh commited on Dec 21, 2024

Commit

36e1704

verified ·

1 Parent(s): 74239a2

End of training

Browse files

Files changed (21) hide show

README.md +95 -0
all_results.json +13 -0
config.json +249 -0
eval_results.json +8 -0
model.safetensors +3 -0
preprocessor_config.json +22 -0
runs/Dec21_13-14-05_3efe3f73ff0a/events.out.tfevents.1734787160.3efe3f73ff0a.417.0 +3 -0
runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734787495.3efe3f73ff0a.3963.0 +3 -0
runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734789288.3efe3f73ff0a.3963.1 +3 -0
runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734789352.3efe3f73ff0a.3963.2 +3 -0
runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734789453.3efe3f73ff0a.3963.3 +3 -0
runs/Dec21_14-02-33_3efe3f73ff0a/events.out.tfevents.1734789763.3efe3f73ff0a.3963.4 +3 -0
runs/Dec21_14-27-10_3efe3f73ff0a/events.out.tfevents.1734791239.3efe3f73ff0a.3963.9 +3 -0
runs/Dec21_14-27-10_3efe3f73ff0a/events.out.tfevents.1734791271.3efe3f73ff0a.3963.10 +3 -0
runs/Dec21_14-27-10_3efe3f73ff0a/events.out.tfevents.1734791296.3efe3f73ff0a.3963.11 +3 -0
runs/Dec21_14-35-24_3efe3f73ff0a/events.out.tfevents.1734791742.3efe3f73ff0a.21272.0 +3 -0
runs/Dec21_14-35-24_3efe3f73ff0a/events.out.tfevents.1734791834.3efe3f73ff0a.21272.1 +3 -0
test_results.json +8 -0
train_results.json +8 -0
trainer_state.json +1118 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,95 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: microsoft/resnet-50
+tags:
+- generated_from_trainer
+datasets:
+- oxford102_flower_dataset
+metrics:
+- accuracy
+model-index:
+- name: resnet-50-finetuned-oxfordflowers
+  results:
+  - task:
+      name: Image Classification
+      type: image-classification
+    dataset:
+      name: oxford102_flower_dataset
+      type: oxford102_flower_dataset
+      config: default
+      split: validation
+      args: default
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.85
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# resnet-50-finetuned-oxfordflowers
+This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the oxford102_flower_dataset dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5915
+- Accuracy: 0.85
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 32
+- eval_batch_size: 32
+- seed: 42
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 20
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 4.5224        | 1.0   | 32   | 4.2939          | 0.25     |
+| 2.8139        | 2.0   | 64   | 2.1128          | 0.4892   |
+| 1.4505        | 3.0   | 96   | 1.2261          | 0.6843   |
+| 0.5751        | 4.0   | 128  | 1.0176          | 0.7441   |
+| 0.2265        | 5.0   | 160  | 0.8487          | 0.7559   |
+| 0.0531        | 6.0   | 192  | 0.7609          | 0.8      |
+| 0.0411        | 7.0   | 224  | 0.7191          | 0.8029   |
+| 0.0351        | 8.0   | 256  | 0.6987          | 0.8078   |
+| 0.0107        | 9.0   | 288  | 0.6843          | 0.8225   |
+| 0.0094        | 10.0  | 320  | 0.6314          | 0.8343   |
+| 0.0081        | 11.0  | 352  | 0.6320          | 0.8353   |
+| 0.0053        | 12.0  | 384  | 0.6049          | 0.8353   |
+| 0.0048        | 13.0  | 416  | 0.5961          | 0.8373   |
+| 0.0024        | 14.0  | 448  | 0.5880          | 0.8471   |
+| 0.0028        | 15.0  | 480  | 0.5927          | 0.8441   |
+| 0.0023        | 16.0  | 512  | 0.5878          | 0.8520   |
+| 0.0027        | 17.0  | 544  | 0.5872          | 0.8471   |
+| 0.0028        | 18.0  | 576  | 0.5892          | 0.8451   |
+| 0.002         | 19.0  | 608  | 0.5933          | 0.8412   |
+| 0.0017        | 20.0  | 640  | 0.5915          | 0.85     |
+### Framework versions
+- Transformers 4.47.1
+- Pytorch 2.5.1+cu121
+- Datasets 3.2.0
+- Tokenizers 0.21.0

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 20.0,
+    "eval_accuracy": 0.7801268498942917,
+    "eval_loss": 0.8647737503051758,
+    "eval_runtime": 123.7654,
+    "eval_samples_per_second": 49.683,
+    "eval_steps_per_second": 0.396,
+    "total_flos": 4.36977436041216e+17,
+    "train_loss": 0.5368185924002319,
+    "train_runtime": 902.6693,
+    "train_samples_per_second": 22.6,
+    "train_steps_per_second": 0.709
+}

config.json ADDED Viewed

	@@ -0,0 +1,249 @@

+{
+  "_name_or_path": "microsoft/resnet-50",
+  "architectures": [
+    "ResNetForImageClassification"
+  ],
+  "depths": [
+    3,
+    4,
+    6,
+    3
+  ],
+  "downsample_in_bottleneck": false,
+  "downsample_in_first_stage": false,
+  "embedding_size": 64,
+  "hidden_act": "relu",
+  "hidden_sizes": [
+    256,
+    512,
+    1024,
+    2048
+  ],
+  "id2label": {
+    "0": "pink primrose",
+    "1": "hard-leaved pocket orchid",
+    "2": "canterbury bells",
+    "3": "sweet pea",
+    "4": "english marigold",
+    "5": "tiger lily",
+    "6": "moon orchid",
+    "7": "bird of paradise",
+    "8": "monkshood",
+    "9": "globe thistle",
+    "10": "snapdragon",
+    "11": "colt's foot",
+    "12": "king protea",
+    "13": "spear thistle",
+    "14": "yellow iris",
+    "15": "globe-flower",
+    "16": "purple coneflower",
+    "17": "peruvian lily",
+    "18": "balloon flower",
+    "19": "giant white arum lily",
+    "20": "fire lily",
+    "21": "pincushion flower",
+    "22": "fritillary",
+    "23": "red ginger",
+    "24": "grape hyacinth",
+    "25": "corn poppy",
+    "26": "prince of wales feathers",
+    "27": "stemless gentian",
+    "28": "artichoke",
+    "29": "sweet william",
+    "30": "carnation",
+    "31": "garden phlox",
+    "32": "love in the mist",
+    "33": "mexican aster",
+    "34": "alpine sea holly",
+    "35": "ruby-lipped cattleya",
+    "36": "cape flower",
+    "37": "great masterwort",
+    "38": "siam tulip",
+    "39": "lenten rose",
+    "40": "barbeton daisy",
+    "41": "daffodil",
+    "42": "sword lily",
+    "43": "poinsettia",
+    "44": "bolero deep blue",
+    "45": "wallflower",
+    "46": "marigold",
+    "47": "buttercup",
+    "48": "oxeye daisy",
+    "49": "common dandelion",
+    "50": "petunia",
+    "51": "wild pansy",
+    "52": "primula",
+    "53": "sunflower",
+    "54": "pelargonium",
+    "55": "bishop of llandaff",
+    "56": "gaura",
+    "57": "geranium",
+    "58": "orange dahlia",
+    "59": "pink-yellow dahlia?",
+    "60": "cautleya spicata",
+    "61": "japanese anemone",
+    "62": "black-eyed susan",
+    "63": "silverbush",
+    "64": "californian poppy",
+    "65": "osteospermum",
+    "66": "spring crocus",
+    "67": "bearded iris",
+    "68": "windflower",
+    "69": "tree poppy",
+    "70": "gazania",
+    "71": "azalea",
+    "72": "water lily",
+    "73": "rose",
+    "74": "thorn apple",
+    "75": "morning glory",
+    "76": "passion flower",
+    "77": "lotus",
+    "78": "toad lily",
+    "79": "anthurium",
+    "80": "frangipani",
+    "81": "clematis",
+    "82": "hibiscus",
+    "83": "columbine",
+    "84": "desert-rose",
+    "85": "tree mallow",
+    "86": "magnolia",
+    "87": "cyclamen",
+    "88": "watercress",
+    "89": "canna lily",
+    "90": "hippeastrum",
+    "91": "bee balm",
+    "92": "ball moss",
+    "93": "foxglove",
+    "94": "bougainvillea",
+    "95": "camellia",
+    "96": "mallow",
+    "97": "mexican petunia",
+    "98": "bromelia",
+    "99": "blanket flower",
+    "100": "trumpet creeper",
+    "101": "blackberry lily"
+  },
+  "label2id": {
+    "alpine sea holly": 34,
+    "anthurium": 79,
+    "artichoke": 28,
+    "azalea": 71,
+    "ball moss": 92,
+    "balloon flower": 18,
+    "barbeton daisy": 40,
+    "bearded iris": 67,
+    "bee balm": 91,
+    "bird of paradise": 7,
+    "bishop of llandaff": 55,
+    "black-eyed susan": 62,
+    "blackberry lily": 101,
+    "blanket flower": 99,
+    "bolero deep blue": 44,
+    "bougainvillea": 94,
+    "bromelia": 98,
+    "buttercup": 47,
+    "californian poppy": 64,
+    "camellia": 95,
+    "canna lily": 89,
+    "canterbury bells": 2,
+    "cape flower": 36,
+    "carnation": 30,
+    "cautleya spicata": 60,
+    "clematis": 81,
+    "colt's foot": 11,
+    "columbine": 83,
+    "common dandelion": 49,
+    "corn poppy": 25,
+    "cyclamen": 87,
+    "daffodil": 41,
+    "desert-rose": 84,
+    "english marigold": 4,
+    "fire lily": 20,
+    "foxglove": 93,
+    "frangipani": 80,
+    "fritillary": 22,
+    "garden phlox": 31,
+    "gaura": 56,
+    "gazania": 70,
+    "geranium": 57,
+    "giant white arum lily": 19,
+    "globe thistle": 9,
+    "globe-flower": 15,
+    "grape hyacinth": 24,
+    "great masterwort": 37,
+    "hard-leaved pocket orchid": 1,
+    "hibiscus": 82,
+    "hippeastrum": 90,
+    "japanese anemone": 61,
+    "king protea": 12,
+    "lenten rose": 39,
+    "lotus": 77,
+    "love in the mist": 32,
+    "magnolia": 86,
+    "mallow": 96,
+    "marigold": 46,
+    "mexican aster": 33,
+    "mexican petunia": 97,
+    "monkshood": 8,
+    "moon orchid": 6,
+    "morning glory": 75,
+    "orange dahlia": 58,
+    "osteospermum": 65,
+    "oxeye daisy": 48,
+    "passion flower": 76,
+    "pelargonium": 54,
+    "peruvian lily": 17,
+    "petunia": 50,
+    "pincushion flower": 21,
+    "pink primrose": 0,
+    "pink-yellow dahlia?": 59,
+    "poinsettia": 43,
+    "primula": 52,
+    "prince of wales feathers": 26,
+    "purple coneflower": 16,
+    "red ginger": 23,
+    "rose": 73,
+    "ruby-lipped cattleya": 35,
+    "siam tulip": 38,
+    "silverbush": 63,
+    "snapdragon": 10,
+    "spear thistle": 13,
+    "spring crocus": 66,
+    "stemless gentian": 27,
+    "sunflower": 53,
+    "sweet pea": 3,
+    "sweet william": 29,
+    "sword lily": 42,
+    "thorn apple": 74,
+    "tiger lily": 5,
+    "toad lily": 78,
+    "tree mallow": 85,
+    "tree poppy": 69,
+    "trumpet creeper": 100,
+    "wallflower": 45,
+    "water lily": 72,
+    "watercress": 88,
+    "wild pansy": 51,
+    "windflower": 68,
+    "yellow iris": 14
+  },
+  "layer_type": "bottleneck",
+  "model_type": "resnet",
+  "num_channels": 3,
+  "out_features": [
+    "stage4"
+  ],
+  "out_indices": [
+    4
+  ],
+  "problem_type": "single_label_classification",
+  "stage_names": [
+    "stem",
+    "stage1",
+    "stage2",
+    "stage3",
+    "stage4"
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1"
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 20.0,
+    "eval_accuracy": 0.8205882352941176,
+    "eval_loss": 0.7496953010559082,
+    "eval_runtime": 16.2045,
+    "eval_samples_per_second": 62.945,
+    "eval_steps_per_second": 0.494
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9f6515139de92486682b8ab4b26eaa75f616f163f3c2214015877cb8aa5c5dc
+size 95122680

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "crop_pct": 0.875,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "ConvNextImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}

runs/Dec21_13-14-05_3efe3f73ff0a/events.out.tfevents.1734787160.3efe3f73ff0a.417.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eed13b0b6740ca2aafd3c55fc15f80f69d064551d0ba61bc5c4fe86fa2fb9076
+size 12593

runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734787495.3efe3f73ff0a.3963.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b741d50eb6d5124cc34fda60cc47faab201680eacc036bf86e3e658fe0805fa6
+size 22339

runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734789288.3efe3f73ff0a.3963.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf552733c6dbfaa37c56e75633a279652fee24cee5ffc5a6293b620daddefd27
+size 10182

runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734789352.3efe3f73ff0a.3963.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:103e534027fa47e89b8f8988bcf7940a7d2feb0e8d07747ce76697c86e966ee0
+size 10499

runs/Dec21_13-24-50_3efe3f73ff0a/events.out.tfevents.1734789453.3efe3f73ff0a.3963.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:817cc2fb68a4a02bdc9ae2fb3dbaf2f701e7895599b9166fa72162195cca76fa
+size 14493

runs/Dec21_14-02-33_3efe3f73ff0a/events.out.tfevents.1734789763.3efe3f73ff0a.3963.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cff27986bd235dd32b2e32db30cd85b8280a934308adddf5bc31b5309a8ccc22
+size 24806

runs/Dec21_14-27-10_3efe3f73ff0a/events.out.tfevents.1734791239.3efe3f73ff0a.3963.9 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:741849c9a7960bd00c9ef7b053acf8f82f53151c23efab3eadf07644e0278e8f
+size 10190

runs/Dec21_14-27-10_3efe3f73ff0a/events.out.tfevents.1734791271.3efe3f73ff0a.3963.10 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93f6d2e497ce7bbdd00bfd4f1a4162272402541eaab5fa1782ad97bfa5afe1b2
+size 10190

runs/Dec21_14-27-10_3efe3f73ff0a/events.out.tfevents.1734791296.3efe3f73ff0a.3963.11 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9c276c4c60478d4553671a521d4b4d6e7ebfbf491a1cf4f6ec317fa96894516
+size 10190

runs/Dec21_14-35-24_3efe3f73ff0a/events.out.tfevents.1734791742.3efe3f73ff0a.21272.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcac6354c9b53e02123c238a5d917abd048fc850df809a0598b153dda3d2b0f5
+size 10496

runs/Dec21_14-35-24_3efe3f73ff0a/events.out.tfevents.1734791834.3efe3f73ff0a.21272.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7ce7408d045a0d0446afbcc1c0a8184f5725d2bad105d5801508c6a2283dfb8
+size 11139

test_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 20.0,
+    "eval_accuracy": 0.7801268498942917,
+    "eval_loss": 0.8647737503051758,
+    "eval_runtime": 123.7654,
+    "eval_samples_per_second": 49.683,
+    "eval_steps_per_second": 0.396
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 20.0,
+    "total_flos": 4.36977436041216e+17,
+    "train_loss": 0.5368185924002319,
+    "train_runtime": 902.6693,
+    "train_samples_per_second": 22.6,
+    "train_steps_per_second": 0.709
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1118 @@

+{
+  "best_metric": 0.8519607843137255,
+  "best_model_checkpoint": "resnet-50-finetuned-oxfordflowers/checkpoint-512",
+  "epoch": 20.0,
+  "eval_steps": 500,
+  "global_step": 640,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.15625,
+      "grad_norm": 1.6800851821899414,
+      "learning_rate": 0.0009921875,
+      "loss": 4.6507,
+      "step": 5
+    },
+    {
+      "epoch": 0.3125,
+      "grad_norm": 1.4043323993682861,
+      "learning_rate": 0.000984375,
+      "loss": 4.6146,
+      "step": 10
+    },
+    {
+      "epoch": 0.46875,
+      "grad_norm": 1.4668281078338623,
+      "learning_rate": 0.0009765625,
+      "loss": 4.6484,
+      "step": 15
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 1.4262796640396118,
+      "learning_rate": 0.00096875,
+      "loss": 4.6292,
+      "step": 20
+    },
+    {
+      "epoch": 0.78125,
+      "grad_norm": 1.1266566514968872,
+      "learning_rate": 0.0009609375,
+      "loss": 4.5702,
+      "step": 25
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 1.0406345129013062,
+      "learning_rate": 0.000953125,
+      "loss": 4.5224,
+      "step": 30
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.25,
+      "eval_loss": 4.293937683105469,
+      "eval_runtime": 15.9684,
+      "eval_samples_per_second": 63.876,
+      "eval_steps_per_second": 2.004,
+      "step": 32
+    },
+    {
+      "epoch": 1.09375,
+      "grad_norm": 2.009798765182495,
+      "learning_rate": 0.0009453125,
+      "loss": 4.2571,
+      "step": 35
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 1.697478175163269,
+      "learning_rate": 0.0009375,
+      "loss": 3.9421,
+      "step": 40
+    },
+    {
+      "epoch": 1.40625,
+      "grad_norm": 2.285863161087036,
+      "learning_rate": 0.0009296875000000001,
+      "loss": 3.6971,
+      "step": 45
+    },
+    {
+      "epoch": 1.5625,
+      "grad_norm": 2.211660623550415,
+      "learning_rate": 0.0009218750000000001,
+      "loss": 3.3611,
+      "step": 50
+    },
+    {
+      "epoch": 1.71875,
+      "grad_norm": 2.331829786300659,
+      "learning_rate": 0.0009140625,
+      "loss": 3.1108,
+      "step": 55
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 2.799659013748169,
+      "learning_rate": 0.00090625,
+      "loss": 2.8139,
+      "step": 60
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.4892156862745098,
+      "eval_loss": 2.112804651260376,
+      "eval_runtime": 16.5487,
+      "eval_samples_per_second": 61.636,
+      "eval_steps_per_second": 1.934,
+      "step": 64
+    },
+    {
+      "epoch": 2.03125,
+      "grad_norm": 2.7438573837280273,
+      "learning_rate": 0.0008984375,
+      "loss": 2.6103,
+      "step": 65
+    },
+    {
+      "epoch": 2.1875,
+      "grad_norm": 2.3204867839813232,
+      "learning_rate": 0.000890625,
+      "loss": 2.0599,
+      "step": 70
+    },
+    {
+      "epoch": 2.34375,
+      "grad_norm": 2.3990378379821777,
+      "learning_rate": 0.0008828125,
+      "loss": 1.7052,
+      "step": 75
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 3.4195637702941895,
+      "learning_rate": 0.000875,
+      "loss": 1.5619,
+      "step": 80
+    },
+    {
+      "epoch": 2.65625,
+      "grad_norm": 2.6798551082611084,
+      "learning_rate": 0.0008671875,
+      "loss": 1.4689,
+      "step": 85
+    },
+    {
+      "epoch": 2.8125,
+      "grad_norm": 3.0105719566345215,
+      "learning_rate": 0.000859375,
+      "loss": 1.4125,
+      "step": 90
+    },
+    {
+      "epoch": 2.96875,
+      "grad_norm": 3.218193531036377,
+      "learning_rate": 0.0008515625,
+      "loss": 1.4505,
+      "step": 95
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.6843137254901961,
+      "eval_loss": 1.226142406463623,
+      "eval_runtime": 16.601,
+      "eval_samples_per_second": 61.442,
+      "eval_steps_per_second": 1.928,
+      "step": 96
+    },
+    {
+      "epoch": 3.125,
+      "grad_norm": 2.6317319869995117,
+      "learning_rate": 0.00084375,
+      "loss": 0.7778,
+      "step": 100
+    },
+    {
+      "epoch": 3.28125,
+      "grad_norm": 2.3407766819000244,
+      "learning_rate": 0.0008359375,
+      "loss": 0.8178,
+      "step": 105
+    },
+    {
+      "epoch": 3.4375,
+      "grad_norm": 2.060016632080078,
+      "learning_rate": 0.000828125,
+      "loss": 0.7545,
+      "step": 110
+    },
+    {
+      "epoch": 3.59375,
+      "grad_norm": 2.2562413215637207,
+      "learning_rate": 0.0008203125,
+      "loss": 0.6023,
+      "step": 115
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 2.7784945964813232,
+      "learning_rate": 0.0008125000000000001,
+      "loss": 0.5268,
+      "step": 120
+    },
+    {
+      "epoch": 3.90625,
+      "grad_norm": 2.47145676612854,
+      "learning_rate": 0.0008046875000000001,
+      "loss": 0.5751,
+      "step": 125
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7441176470588236,
+      "eval_loss": 1.0175817012786865,
+      "eval_runtime": 18.5424,
+      "eval_samples_per_second": 55.009,
+      "eval_steps_per_second": 1.726,
+      "step": 128
+    },
+    {
+      "epoch": 4.0625,
+      "grad_norm": 1.2707927227020264,
+      "learning_rate": 0.0007968750000000001,
+      "loss": 0.41,
+      "step": 130
+    },
+    {
+      "epoch": 4.21875,
+      "grad_norm": 2.2418272495269775,
+      "learning_rate": 0.0007890625,
+      "loss": 0.2771,
+      "step": 135
+    },
+    {
+      "epoch": 4.375,
+      "grad_norm": 1.0117669105529785,
+      "learning_rate": 0.00078125,
+      "loss": 0.2848,
+      "step": 140
+    },
+    {
+      "epoch": 4.53125,
+      "grad_norm": 1.5163785219192505,
+      "learning_rate": 0.0007734375,
+      "loss": 0.2322,
+      "step": 145
+    },
+    {
+      "epoch": 4.6875,
+      "grad_norm": 1.693102478981018,
+      "learning_rate": 0.000765625,
+      "loss": 0.292,
+      "step": 150
+    },
+    {
+      "epoch": 4.84375,
+      "grad_norm": 1.6366838216781616,
+      "learning_rate": 0.0007578125,
+      "loss": 0.2391,
+      "step": 155
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.1743065118789673,
+      "learning_rate": 0.00075,
+      "loss": 0.2265,
+      "step": 160
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7558823529411764,
+      "eval_loss": 0.8487027287483215,
+      "eval_runtime": 20.2945,
+      "eval_samples_per_second": 50.26,
+      "eval_steps_per_second": 1.577,
+      "step": 160
+    },
+    {
+      "epoch": 5.15625,
+      "grad_norm": 0.5249314308166504,
+      "learning_rate": 0.0007421875,
+      "loss": 0.1254,
+      "step": 165
+    },
+    {
+      "epoch": 5.3125,
+      "grad_norm": 0.41112297773361206,
+      "learning_rate": 0.000734375,
+      "loss": 0.0917,
+      "step": 170
+    },
+    {
+      "epoch": 5.46875,
+      "grad_norm": 1.9200881719589233,
+      "learning_rate": 0.0007265625,
+      "loss": 0.1139,
+      "step": 175
+    },
+    {
+      "epoch": 5.625,
+      "grad_norm": 0.7506140470504761,
+      "learning_rate": 0.00071875,
+      "loss": 0.116,
+      "step": 180
+    },
+    {
+      "epoch": 5.78125,
+      "grad_norm": 1.2240333557128906,
+      "learning_rate": 0.0007109375,
+      "loss": 0.1251,
+      "step": 185
+    },
+    {
+      "epoch": 5.9375,
+      "grad_norm": 1.3143774271011353,
+      "learning_rate": 0.000703125,
+      "loss": 0.0531,
+      "step": 190
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.8,
+      "eval_loss": 0.7608510255813599,
+      "eval_runtime": 24.6873,
+      "eval_samples_per_second": 41.317,
+      "eval_steps_per_second": 1.296,
+      "step": 192
+    },
+    {
+      "epoch": 6.09375,
+      "grad_norm": 1.9283502101898193,
+      "learning_rate": 0.0006953125,
+      "loss": 0.1274,
+      "step": 195
+    },
+    {
+      "epoch": 6.25,
+      "grad_norm": 0.5062114000320435,
+      "learning_rate": 0.0006875,
+      "loss": 0.0358,
+      "step": 200
+    },
+    {
+      "epoch": 6.40625,
+      "grad_norm": 1.057132601737976,
+      "learning_rate": 0.0006796875000000001,
+      "loss": 0.0426,
+      "step": 205
+    },
+    {
+      "epoch": 6.5625,
+      "grad_norm": 0.2724122107028961,
+      "learning_rate": 0.0006718750000000001,
+      "loss": 0.0668,
+      "step": 210
+    },
+    {
+      "epoch": 6.71875,
+      "grad_norm": 0.3335299789905548,
+      "learning_rate": 0.0006640625,
+      "loss": 0.0838,
+      "step": 215
+    },
+    {
+      "epoch": 6.875,
+      "grad_norm": 0.5840352177619934,
+      "learning_rate": 0.00065625,
+      "loss": 0.0411,
+      "step": 220
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.8029411764705883,
+      "eval_loss": 0.7190886735916138,
+      "eval_runtime": 18.919,
+      "eval_samples_per_second": 53.914,
+      "eval_steps_per_second": 1.691,
+      "step": 224
+    },
+    {
+      "epoch": 7.03125,
+      "grad_norm": 0.6974908709526062,
+      "learning_rate": 0.0006484375,
+      "loss": 0.0412,
+      "step": 225
+    },
+    {
+      "epoch": 7.1875,
+      "grad_norm": 0.27331459522247314,
+      "learning_rate": 0.000640625,
+      "loss": 0.0238,
+      "step": 230
+    },
+    {
+      "epoch": 7.34375,
+      "grad_norm": 0.26315683126449585,
+      "learning_rate": 0.0006328125,
+      "loss": 0.0181,
+      "step": 235
+    },
+    {
+      "epoch": 7.5,
+      "grad_norm": 0.979246199131012,
+      "learning_rate": 0.000625,
+      "loss": 0.0368,
+      "step": 240
+    },
+    {
+      "epoch": 7.65625,
+      "grad_norm": 0.18979792296886444,
+      "learning_rate": 0.0006171875,
+      "loss": 0.0293,
+      "step": 245
+    },
+    {
+      "epoch": 7.8125,
+      "grad_norm": 2.098189115524292,
+      "learning_rate": 0.000609375,
+      "loss": 0.0263,
+      "step": 250
+    },
+    {
+      "epoch": 7.96875,
+      "grad_norm": 0.20951713621616364,
+      "learning_rate": 0.0006015625,
+      "loss": 0.0351,
+      "step": 255
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.807843137254902,
+      "eval_loss": 0.698701798915863,
+      "eval_runtime": 20.5747,
+      "eval_samples_per_second": 49.575,
+      "eval_steps_per_second": 1.555,
+      "step": 256
+    },
+    {
+      "epoch": 8.125,
+      "grad_norm": 0.08398638665676117,
+      "learning_rate": 0.00059375,
+      "loss": 0.0138,
+      "step": 260
+    },
+    {
+      "epoch": 8.28125,
+      "grad_norm": 0.94996577501297,
+      "learning_rate": 0.0005859375,
+      "loss": 0.0176,
+      "step": 265
+    },
+    {
+      "epoch": 8.4375,
+      "grad_norm": 0.14498768746852875,
+      "learning_rate": 0.000578125,
+      "loss": 0.0149,
+      "step": 270
+    },
+    {
+      "epoch": 8.59375,
+      "grad_norm": 0.1302383691072464,
+      "learning_rate": 0.0005703125,
+      "loss": 0.0146,
+      "step": 275
+    },
+    {
+      "epoch": 8.75,
+      "grad_norm": 0.3484581708908081,
+      "learning_rate": 0.0005625000000000001,
+      "loss": 0.0183,
+      "step": 280
+    },
+    {
+      "epoch": 8.90625,
+      "grad_norm": 0.1543685644865036,
+      "learning_rate": 0.0005546875000000001,
+      "loss": 0.0107,
+      "step": 285
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.8225490196078431,
+      "eval_loss": 0.6843494176864624,
+      "eval_runtime": 16.0725,
+      "eval_samples_per_second": 63.462,
+      "eval_steps_per_second": 1.991,
+      "step": 288
+    },
+    {
+      "epoch": 9.0625,
+      "grad_norm": 0.9732298851013184,
+      "learning_rate": 0.000546875,
+      "loss": 0.0156,
+      "step": 290
+    },
+    {
+      "epoch": 9.21875,
+      "grad_norm": 0.09730440378189087,
+      "learning_rate": 0.0005390625,
+      "loss": 0.0114,
+      "step": 295
+    },
+    {
+      "epoch": 9.375,
+      "grad_norm": 0.41419529914855957,
+      "learning_rate": 0.00053125,
+      "loss": 0.0101,
+      "step": 300
+    },
+    {
+      "epoch": 9.53125,
+      "grad_norm": 0.055323634296655655,
+      "learning_rate": 0.0005234375,
+      "loss": 0.0074,
+      "step": 305
+    },
+    {
+      "epoch": 9.6875,
+      "grad_norm": 0.07538346946239471,
+      "learning_rate": 0.000515625,
+      "loss": 0.0051,
+      "step": 310
+    },
+    {
+      "epoch": 9.84375,
+      "grad_norm": 0.037017084658145905,
+      "learning_rate": 0.0005078125,
+      "loss": 0.0133,
+      "step": 315
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.05076463520526886,
+      "learning_rate": 0.0005,
+      "loss": 0.0094,
+      "step": 320
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.8343137254901961,
+      "eval_loss": 0.6314178109169006,
+      "eval_runtime": 16.9633,
+      "eval_samples_per_second": 60.13,
+      "eval_steps_per_second": 1.886,
+      "step": 320
+    },
+    {
+      "epoch": 10.15625,
+      "grad_norm": 0.04146264120936394,
+      "learning_rate": 0.0004921875,
+      "loss": 0.0045,
+      "step": 325
+    },
+    {
+      "epoch": 10.3125,
+      "grad_norm": 1.678152084350586,
+      "learning_rate": 0.000484375,
+      "loss": 0.0111,
+      "step": 330
+    },
+    {
+      "epoch": 10.46875,
+      "grad_norm": 0.08414560556411743,
+      "learning_rate": 0.0004765625,
+      "loss": 0.004,
+      "step": 335
+    },
+    {
+      "epoch": 10.625,
+      "grad_norm": 0.062152933329343796,
+      "learning_rate": 0.00046875,
+      "loss": 0.0058,
+      "step": 340
+    },
+    {
+      "epoch": 10.78125,
+      "grad_norm": 0.18813878297805786,
+      "learning_rate": 0.00046093750000000003,
+      "loss": 0.0059,
+      "step": 345
+    },
+    {
+      "epoch": 10.9375,
+      "grad_norm": 0.03264420107007027,
+      "learning_rate": 0.000453125,
+      "loss": 0.0081,
+      "step": 350
+    },
+    {
+      "epoch": 11.0,
+      "eval_accuracy": 0.8352941176470589,
+      "eval_loss": 0.6319591999053955,
+      "eval_runtime": 16.0721,
+      "eval_samples_per_second": 63.464,
+      "eval_steps_per_second": 1.991,
+      "step": 352
+    },
+    {
+      "epoch": 11.09375,
+      "grad_norm": 0.01650502346456051,
+      "learning_rate": 0.0004453125,
+      "loss": 0.0058,
+      "step": 355
+    },
+    {
+      "epoch": 11.25,
+      "grad_norm": 0.03100210428237915,
+      "learning_rate": 0.0004375,
+      "loss": 0.0032,
+      "step": 360
+    },
+    {
+      "epoch": 11.40625,
+      "grad_norm": 0.31530651450157166,
+      "learning_rate": 0.0004296875,
+      "loss": 0.0055,
+      "step": 365
+    },
+    {
+      "epoch": 11.5625,
+      "grad_norm": 0.018279677256941795,
+      "learning_rate": 0.000421875,
+      "loss": 0.0042,
+      "step": 370
+    },
+    {
+      "epoch": 11.71875,
+      "grad_norm": 0.039065517485141754,
+      "learning_rate": 0.0004140625,
+      "loss": 0.004,
+      "step": 375
+    },
+    {
+      "epoch": 11.875,
+      "grad_norm": 0.17956194281578064,
+      "learning_rate": 0.00040625000000000004,
+      "loss": 0.0053,
+      "step": 380
+    },
+    {
+      "epoch": 12.0,
+      "eval_accuracy": 0.8352941176470589,
+      "eval_loss": 0.6048569679260254,
+      "eval_runtime": 16.0302,
+      "eval_samples_per_second": 63.63,
+      "eval_steps_per_second": 1.996,
+      "step": 384
+    },
+    {
+      "epoch": 12.03125,
+      "grad_norm": 0.0491081103682518,
+      "learning_rate": 0.00039843750000000003,
+      "loss": 0.004,
+      "step": 385
+    },
+    {
+      "epoch": 12.1875,
+      "grad_norm": 0.06726662814617157,
+      "learning_rate": 0.000390625,
+      "loss": 0.0032,
+      "step": 390
+    },
+    {
+      "epoch": 12.34375,
+      "grad_norm": 0.0226299911737442,
+      "learning_rate": 0.0003828125,
+      "loss": 0.0027,
+      "step": 395
+    },
+    {
+      "epoch": 12.5,
+      "grad_norm": 0.021714534610509872,
+      "learning_rate": 0.000375,
+      "loss": 0.0029,
+      "step": 400
+    },
+    {
+      "epoch": 12.65625,
+      "grad_norm": 0.07769683748483658,
+      "learning_rate": 0.0003671875,
+      "loss": 0.0034,
+      "step": 405
+    },
+    {
+      "epoch": 12.8125,
+      "grad_norm": 0.017162494361400604,
+      "learning_rate": 0.000359375,
+      "loss": 0.0029,
+      "step": 410
+    },
+    {
+      "epoch": 12.96875,
+      "grad_norm": 0.08164256066083908,
+      "learning_rate": 0.0003515625,
+      "loss": 0.0048,
+      "step": 415
+    },
+    {
+      "epoch": 13.0,
+      "eval_accuracy": 0.8372549019607843,
+      "eval_loss": 0.5961340665817261,
+      "eval_runtime": 18.3515,
+      "eval_samples_per_second": 55.581,
+      "eval_steps_per_second": 1.744,
+      "step": 416
+    },
+    {
+      "epoch": 13.125,
+      "grad_norm": 0.05423242226243019,
+      "learning_rate": 0.00034375,
+      "loss": 0.0024,
+      "step": 420
+    },
+    {
+      "epoch": 13.28125,
+      "grad_norm": 0.19008223712444305,
+      "learning_rate": 0.00033593750000000003,
+      "loss": 0.0083,
+      "step": 425
+    },
+    {
+      "epoch": 13.4375,
+      "grad_norm": 0.0373542457818985,
+      "learning_rate": 0.000328125,
+      "loss": 0.002,
+      "step": 430
+    },
+    {
+      "epoch": 13.59375,
+      "grad_norm": 0.014899961650371552,
+      "learning_rate": 0.0003203125,
+      "loss": 0.0029,
+      "step": 435
+    },
+    {
+      "epoch": 13.75,
+      "grad_norm": 0.03342936560511589,
+      "learning_rate": 0.0003125,
+      "loss": 0.0031,
+      "step": 440
+    },
+    {
+      "epoch": 13.90625,
+      "grad_norm": 0.018663976341485977,
+      "learning_rate": 0.0003046875,
+      "loss": 0.0024,
+      "step": 445
+    },
+    {
+      "epoch": 14.0,
+      "eval_accuracy": 0.8470588235294118,
+      "eval_loss": 0.588026225566864,
+      "eval_runtime": 16.6123,
+      "eval_samples_per_second": 61.4,
+      "eval_steps_per_second": 1.926,
+      "step": 448
+    },
+    {
+      "epoch": 14.0625,
+      "grad_norm": 0.03626991808414459,
+      "learning_rate": 0.000296875,
+      "loss": 0.004,
+      "step": 450
+    },
+    {
+      "epoch": 14.21875,
+      "grad_norm": 0.021257249638438225,
+      "learning_rate": 0.0002890625,
+      "loss": 0.0026,
+      "step": 455
+    },
+    {
+      "epoch": 14.375,
+      "grad_norm": 0.032649360597133636,
+      "learning_rate": 0.00028125000000000003,
+      "loss": 0.002,
+      "step": 460
+    },
+    {
+      "epoch": 14.53125,
+      "grad_norm": 0.022741030901670456,
+      "learning_rate": 0.0002734375,
+      "loss": 0.007,
+      "step": 465
+    },
+    {
+      "epoch": 14.6875,
+      "grad_norm": 0.020442800596356392,
+      "learning_rate": 0.000265625,
+      "loss": 0.0023,
+      "step": 470
+    },
+    {
+      "epoch": 14.84375,
+      "grad_norm": 0.022834857925772667,
+      "learning_rate": 0.0002578125,
+      "loss": 0.0031,
+      "step": 475
+    },
+    {
+      "epoch": 15.0,
+      "grad_norm": 0.014007111079990864,
+      "learning_rate": 0.00025,
+      "loss": 0.0028,
+      "step": 480
+    },
+    {
+      "epoch": 15.0,
+      "eval_accuracy": 0.8441176470588235,
+      "eval_loss": 0.5926622748374939,
+      "eval_runtime": 16.2356,
+      "eval_samples_per_second": 62.825,
+      "eval_steps_per_second": 1.971,
+      "step": 480
+    },
+    {
+      "epoch": 15.15625,
+      "grad_norm": 0.00949984509497881,
+      "learning_rate": 0.0002421875,
+      "loss": 0.0023,
+      "step": 485
+    },
+    {
+      "epoch": 15.3125,
+      "grad_norm": 0.04143200442194939,
+      "learning_rate": 0.000234375,
+      "loss": 0.0021,
+      "step": 490
+    },
+    {
+      "epoch": 15.46875,
+      "grad_norm": 0.012401225045323372,
+      "learning_rate": 0.0002265625,
+      "loss": 0.0021,
+      "step": 495
+    },
+    {
+      "epoch": 15.625,
+      "grad_norm": 0.040582917630672455,
+      "learning_rate": 0.00021875,
+      "loss": 0.0031,
+      "step": 500
+    },
+    {
+      "epoch": 15.78125,
+      "grad_norm": 0.025907032191753387,
+      "learning_rate": 0.0002109375,
+      "loss": 0.0021,
+      "step": 505
+    },
+    {
+      "epoch": 15.9375,
+      "grad_norm": 0.008175536058843136,
+      "learning_rate": 0.00020312500000000002,
+      "loss": 0.0023,
+      "step": 510
+    },
+    {
+      "epoch": 16.0,
+      "eval_accuracy": 0.8519607843137255,
+      "eval_loss": 0.5878445506095886,
+      "eval_runtime": 16.1518,
+      "eval_samples_per_second": 63.151,
+      "eval_steps_per_second": 1.981,
+      "step": 512
+    },
+    {
+      "epoch": 16.09375,
+      "grad_norm": 0.3129185140132904,
+      "learning_rate": 0.0001953125,
+      "loss": 0.0044,
+      "step": 515
+    },
+    {
+      "epoch": 16.25,
+      "grad_norm": 0.030808325856924057,
+      "learning_rate": 0.0001875,
+      "loss": 0.0036,
+      "step": 520
+    },
+    {
+      "epoch": 16.40625,
+      "grad_norm": 0.019886957481503487,
+      "learning_rate": 0.0001796875,
+      "loss": 0.0026,
+      "step": 525
+    },
+    {
+      "epoch": 16.5625,
+      "grad_norm": 0.019268082454800606,
+      "learning_rate": 0.000171875,
+      "loss": 0.0034,
+      "step": 530
+    },
+    {
+      "epoch": 16.71875,
+      "grad_norm": 0.025241246446967125,
+      "learning_rate": 0.0001640625,
+      "loss": 0.0019,
+      "step": 535
+    },
+    {
+      "epoch": 16.875,
+      "grad_norm": 0.01479440089315176,
+      "learning_rate": 0.00015625,
+      "loss": 0.0027,
+      "step": 540
+    },
+    {
+      "epoch": 17.0,
+      "eval_accuracy": 0.8470588235294118,
+      "eval_loss": 0.5872153043746948,
+      "eval_runtime": 16.056,
+      "eval_samples_per_second": 63.528,
+      "eval_steps_per_second": 1.993,
+      "step": 544
+    },
+    {
+      "epoch": 17.03125,
+      "grad_norm": 0.01011387724429369,
+      "learning_rate": 0.0001484375,
+      "loss": 0.0019,
+      "step": 545
+    },
+    {
+      "epoch": 17.1875,
+      "grad_norm": 0.020896941423416138,
+      "learning_rate": 0.00014062500000000002,
+      "loss": 0.0022,
+      "step": 550
+    },
+    {
+      "epoch": 17.34375,
+      "grad_norm": 0.040105391293764114,
+      "learning_rate": 0.0001328125,
+      "loss": 0.002,
+      "step": 555
+    },
+    {
+      "epoch": 17.5,
+      "grad_norm": 0.016236811876296997,
+      "learning_rate": 0.000125,
+      "loss": 0.0024,
+      "step": 560
+    },
+    {
+      "epoch": 17.65625,
+      "grad_norm": 0.010203810408711433,
+      "learning_rate": 0.0001171875,
+      "loss": 0.002,
+      "step": 565
+    },
+    {
+      "epoch": 17.8125,
+      "grad_norm": 0.01675267145037651,
+      "learning_rate": 0.000109375,
+      "loss": 0.0019,
+      "step": 570
+    },
+    {
+      "epoch": 17.96875,
+      "grad_norm": 0.08755680918693542,
+      "learning_rate": 0.00010156250000000001,
+      "loss": 0.0028,
+      "step": 575
+    },
+    {
+      "epoch": 18.0,
+      "eval_accuracy": 0.8450980392156863,
+      "eval_loss": 0.5891793966293335,
+      "eval_runtime": 15.883,
+      "eval_samples_per_second": 64.22,
+      "eval_steps_per_second": 2.015,
+      "step": 576
+    },
+    {
+      "epoch": 18.125,
+      "grad_norm": 0.060470979660749435,
+      "learning_rate": 9.375e-05,
+      "loss": 0.003,
+      "step": 580
+    },
+    {
+      "epoch": 18.28125,
+      "grad_norm": 0.02452988736331463,
+      "learning_rate": 8.59375e-05,
+      "loss": 0.0017,
+      "step": 585
+    },
+    {
+      "epoch": 18.4375,
+      "grad_norm": 0.02058909274637699,
+      "learning_rate": 7.8125e-05,
+      "loss": 0.002,
+      "step": 590
+    },
+    {
+      "epoch": 18.59375,
+      "grad_norm": 0.01303939614444971,
+      "learning_rate": 7.031250000000001e-05,
+      "loss": 0.0025,
+      "step": 595
+    },
+    {
+      "epoch": 18.75,
+      "grad_norm": 0.006279917433857918,
+      "learning_rate": 6.25e-05,
+      "loss": 0.0027,
+      "step": 600
+    },
+    {
+      "epoch": 18.90625,
+      "grad_norm": 0.022672630846500397,
+      "learning_rate": 5.46875e-05,
+      "loss": 0.002,
+      "step": 605
+    },
+    {
+      "epoch": 19.0,
+      "eval_accuracy": 0.8411764705882353,
+      "eval_loss": 0.5932831764221191,
+      "eval_runtime": 16.4628,
+      "eval_samples_per_second": 61.958,
+      "eval_steps_per_second": 1.944,
+      "step": 608
+    },
+    {
+      "epoch": 19.0625,
+      "grad_norm": 0.15350750088691711,
+      "learning_rate": 4.6875e-05,
+      "loss": 0.0034,
+      "step": 610
+    },
+    {
+      "epoch": 19.21875,
+      "grad_norm": 0.01092343870550394,
+      "learning_rate": 3.90625e-05,
+      "loss": 0.002,
+      "step": 615
+    },
+    {
+      "epoch": 19.375,
+      "grad_norm": 0.008441799320280552,
+      "learning_rate": 3.125e-05,
+      "loss": 0.0022,
+      "step": 620
+    },
+    {
+      "epoch": 19.53125,
+      "grad_norm": 0.012427592650055885,
+      "learning_rate": 2.34375e-05,
+      "loss": 0.0026,
+      "step": 625
+    },
+    {
+      "epoch": 19.6875,
+      "grad_norm": 0.019600288942456245,
+      "learning_rate": 1.5625e-05,
+      "loss": 0.0016,
+      "step": 630
+    },
+    {
+      "epoch": 19.84375,
+      "grad_norm": 0.0400865413248539,
+      "learning_rate": 7.8125e-06,
+      "loss": 0.0031,
+      "step": 635
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.03250521048903465,
+      "learning_rate": 0.0,
+      "loss": 0.0017,
+      "step": 640
+    },
+    {
+      "epoch": 20.0,
+      "eval_accuracy": 0.85,
+      "eval_loss": 0.5915272235870361,
+      "eval_runtime": 17.7209,
+      "eval_samples_per_second": 57.559,
+      "eval_steps_per_second": 1.806,
+      "step": 640
+    },
+    {
+      "epoch": 20.0,
+      "step": 640,
+      "total_flos": 4.36977436041216e+17,
+      "train_loss": 0.5368185924002319,
+      "train_runtime": 902.6693,
+      "train_samples_per_second": 22.6,
+      "train_steps_per_second": 0.709
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 640,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.36977436041216e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:672d131f53e4147ca9ceb033df2f0fab539acde03a45ba383fa02daeb0f762a2
+size 5304