motheecreator commited on
Commit
ea3ea91
1 Parent(s): ad58bc1

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7306590257879656
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [motheecreator/vit-Facial-Expression-Recognition](https://huggingface.co/motheecreator/vit-Facial-Expression-Recognition) on the image_folder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.1663
36
- - Accuracy: 0.7307
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.7444126074498567
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [motheecreator/vit-Facial-Expression-Recognition](https://huggingface.co/motheecreator/vit-Facial-Expression-Recognition) on the image_folder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.7038
36
+ - Accuracy: 0.7444
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 5.0,
3
  "eval_accuracy": 0.7444126074498567,
4
  "eval_loss": 0.7038247585296631,
5
- "eval_runtime": 48.5993,
6
- "eval_samples_per_second": 107.718,
7
- "eval_steps_per_second": 13.478,
8
- "total_flos": 8.109125174606561e+18,
9
- "train_loss": 0.5082513862064489,
10
- "train_runtime": 2793.8499,
11
- "train_samples_per_second": 37.468,
12
- "train_steps_per_second": 1.17
13
  }
 
1
  {
2
+ "epoch": 10.0,
3
  "eval_accuracy": 0.7444126074498567,
4
  "eval_loss": 0.7038247585296631,
5
+ "eval_runtime": 50.1881,
6
+ "eval_samples_per_second": 104.308,
7
+ "eval_steps_per_second": 13.051,
8
+ "total_flos": 1.6218250349213123e+19,
9
+ "train_loss": 0.06548295821026195,
10
+ "train_runtime": 2722.3253,
11
+ "train_samples_per_second": 76.905,
12
+ "train_steps_per_second": 2.402
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
  "eval_accuracy": 0.7444126074498567,
4
  "eval_loss": 0.7038247585296631,
5
- "eval_runtime": 48.5993,
6
- "eval_samples_per_second": 107.718,
7
- "eval_steps_per_second": 13.478
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
  "eval_accuracy": 0.7444126074498567,
4
  "eval_loss": 0.7038247585296631,
5
+ "eval_runtime": 50.1881,
6
+ "eval_samples_per_second": 104.308,
7
+ "eval_steps_per_second": 13.051
8
  }
runs/May25_20-15-01_5f59a01ef625/events.out.tfevents.1716670942.5f59a01ef625.42.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979557d094aeaad0e1d93f5fe166f6b99b0a810c97ce5ff3d1b02b787232ba02
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 8.109125174606561e+18,
4
- "train_loss": 0.5082513862064489,
5
- "train_runtime": 2793.8499,
6
- "train_samples_per_second": 37.468,
7
- "train_steps_per_second": 1.17
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.6218250349213123e+19,
4
+ "train_loss": 0.06548295821026195,
5
+ "train_runtime": 2722.3253,
6
+ "train_samples_per_second": 76.905,
7
+ "train_steps_per_second": 2.402
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7444126074498567,
3
  "best_model_checkpoint": "vit-Facial-Expression-Recognition/checkpoint-1962",
4
- "epoch": 4.9980894153611,
5
  "eval_steps": 500,
6
- "global_step": 3270,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2016,21 +2016,2028 @@
2016
  "step": 3270
2017
  },
2018
  {
2019
- "epoch": 5.0,
2020
- "step": 3270,
2021
- "total_flos": 8.109125174606561e+18,
2022
- "train_loss": 0.5082513862064489,
2023
- "train_runtime": 2793.8499,
2024
- "train_samples_per_second": 37.468,
2025
- "train_steps_per_second": 1.17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026
  }
2027
  ],
2028
  "logging_steps": 10,
2029
- "max_steps": 3270,
2030
  "num_input_tokens_seen": 0,
2031
- "num_train_epochs": 5,
2032
  "save_steps": 500,
2033
- "total_flos": 8.109125174606561e+18,
2034
  "train_batch_size": 8,
2035
  "trial_name": null,
2036
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7444126074498567,
3
  "best_model_checkpoint": "vit-Facial-Expression-Recognition/checkpoint-1962",
4
+ "epoch": 9.9980894153611,
5
  "eval_steps": 500,
6
+ "global_step": 6540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2016
  "step": 3270
2017
  },
2018
  {
2019
+ "epoch": 5.02,
2020
+ "learning_rate": 2.7692830445124026e-05,
2021
+ "loss": 0.1929,
2022
+ "step": 3280
2023
+ },
2024
+ {
2025
+ "epoch": 5.03,
2026
+ "learning_rate": 2.760788311247027e-05,
2027
+ "loss": 0.1936,
2028
+ "step": 3290
2029
+ },
2030
+ {
2031
+ "epoch": 5.05,
2032
+ "learning_rate": 2.7522935779816515e-05,
2033
+ "loss": 0.1636,
2034
+ "step": 3300
2035
+ },
2036
+ {
2037
+ "epoch": 5.06,
2038
+ "learning_rate": 2.743798844716276e-05,
2039
+ "loss": 0.1827,
2040
+ "step": 3310
2041
+ },
2042
+ {
2043
+ "epoch": 5.08,
2044
+ "learning_rate": 2.7353041114509004e-05,
2045
+ "loss": 0.2437,
2046
+ "step": 3320
2047
+ },
2048
+ {
2049
+ "epoch": 5.09,
2050
+ "learning_rate": 2.7268093781855254e-05,
2051
+ "loss": 0.2142,
2052
+ "step": 3330
2053
+ },
2054
+ {
2055
+ "epoch": 5.11,
2056
+ "learning_rate": 2.7183146449201497e-05,
2057
+ "loss": 0.2637,
2058
+ "step": 3340
2059
+ },
2060
+ {
2061
+ "epoch": 5.12,
2062
+ "learning_rate": 2.709819911654774e-05,
2063
+ "loss": 0.213,
2064
+ "step": 3350
2065
+ },
2066
+ {
2067
+ "epoch": 5.14,
2068
+ "learning_rate": 2.701325178389399e-05,
2069
+ "loss": 0.2534,
2070
+ "step": 3360
2071
+ },
2072
+ {
2073
+ "epoch": 5.15,
2074
+ "learning_rate": 2.6928304451240233e-05,
2075
+ "loss": 0.2819,
2076
+ "step": 3370
2077
+ },
2078
+ {
2079
+ "epoch": 5.17,
2080
+ "learning_rate": 2.6843357118586476e-05,
2081
+ "loss": 0.2568,
2082
+ "step": 3380
2083
+ },
2084
+ {
2085
+ "epoch": 5.18,
2086
+ "learning_rate": 2.6758409785932726e-05,
2087
+ "loss": 0.2391,
2088
+ "step": 3390
2089
+ },
2090
+ {
2091
+ "epoch": 5.2,
2092
+ "learning_rate": 2.667346245327897e-05,
2093
+ "loss": 0.2604,
2094
+ "step": 3400
2095
+ },
2096
+ {
2097
+ "epoch": 5.21,
2098
+ "learning_rate": 2.6588515120625212e-05,
2099
+ "loss": 0.2849,
2100
+ "step": 3410
2101
+ },
2102
+ {
2103
+ "epoch": 5.23,
2104
+ "learning_rate": 2.6503567787971462e-05,
2105
+ "loss": 0.2233,
2106
+ "step": 3420
2107
+ },
2108
+ {
2109
+ "epoch": 5.24,
2110
+ "learning_rate": 2.6418620455317705e-05,
2111
+ "loss": 0.2154,
2112
+ "step": 3430
2113
+ },
2114
+ {
2115
+ "epoch": 5.26,
2116
+ "learning_rate": 2.6333673122663948e-05,
2117
+ "loss": 0.2891,
2118
+ "step": 3440
2119
+ },
2120
+ {
2121
+ "epoch": 5.28,
2122
+ "learning_rate": 2.624872579001019e-05,
2123
+ "loss": 0.217,
2124
+ "step": 3450
2125
+ },
2126
+ {
2127
+ "epoch": 5.29,
2128
+ "learning_rate": 2.616377845735644e-05,
2129
+ "loss": 0.2627,
2130
+ "step": 3460
2131
+ },
2132
+ {
2133
+ "epoch": 5.31,
2134
+ "learning_rate": 2.6078831124702684e-05,
2135
+ "loss": 0.2276,
2136
+ "step": 3470
2137
+ },
2138
+ {
2139
+ "epoch": 5.32,
2140
+ "learning_rate": 2.5993883792048927e-05,
2141
+ "loss": 0.2119,
2142
+ "step": 3480
2143
+ },
2144
+ {
2145
+ "epoch": 5.34,
2146
+ "learning_rate": 2.5908936459395177e-05,
2147
+ "loss": 0.2302,
2148
+ "step": 3490
2149
+ },
2150
+ {
2151
+ "epoch": 5.35,
2152
+ "learning_rate": 2.582398912674142e-05,
2153
+ "loss": 0.284,
2154
+ "step": 3500
2155
+ },
2156
+ {
2157
+ "epoch": 5.37,
2158
+ "learning_rate": 2.5739041794087666e-05,
2159
+ "loss": 0.2456,
2160
+ "step": 3510
2161
+ },
2162
+ {
2163
+ "epoch": 5.38,
2164
+ "learning_rate": 2.5654094461433913e-05,
2165
+ "loss": 0.2087,
2166
+ "step": 3520
2167
+ },
2168
+ {
2169
+ "epoch": 5.4,
2170
+ "learning_rate": 2.556914712878016e-05,
2171
+ "loss": 0.282,
2172
+ "step": 3530
2173
+ },
2174
+ {
2175
+ "epoch": 5.41,
2176
+ "learning_rate": 2.5484199796126402e-05,
2177
+ "loss": 0.213,
2178
+ "step": 3540
2179
+ },
2180
+ {
2181
+ "epoch": 5.43,
2182
+ "learning_rate": 2.539925246347265e-05,
2183
+ "loss": 0.207,
2184
+ "step": 3550
2185
+ },
2186
+ {
2187
+ "epoch": 5.44,
2188
+ "learning_rate": 2.5314305130818895e-05,
2189
+ "loss": 0.2326,
2190
+ "step": 3560
2191
+ },
2192
+ {
2193
+ "epoch": 5.46,
2194
+ "learning_rate": 2.5229357798165138e-05,
2195
+ "loss": 0.2557,
2196
+ "step": 3570
2197
+ },
2198
+ {
2199
+ "epoch": 5.47,
2200
+ "learning_rate": 2.5144410465511388e-05,
2201
+ "loss": 0.2524,
2202
+ "step": 3580
2203
+ },
2204
+ {
2205
+ "epoch": 5.49,
2206
+ "learning_rate": 2.505946313285763e-05,
2207
+ "loss": 0.3032,
2208
+ "step": 3590
2209
+ },
2210
+ {
2211
+ "epoch": 5.5,
2212
+ "learning_rate": 2.4974515800203874e-05,
2213
+ "loss": 0.3316,
2214
+ "step": 3600
2215
+ },
2216
+ {
2217
+ "epoch": 5.52,
2218
+ "learning_rate": 2.488956846755012e-05,
2219
+ "loss": 0.2447,
2220
+ "step": 3610
2221
+ },
2222
+ {
2223
+ "epoch": 5.53,
2224
+ "learning_rate": 2.4804621134896367e-05,
2225
+ "loss": 0.3041,
2226
+ "step": 3620
2227
+ },
2228
+ {
2229
+ "epoch": 5.55,
2230
+ "learning_rate": 2.471967380224261e-05,
2231
+ "loss": 0.1924,
2232
+ "step": 3630
2233
+ },
2234
+ {
2235
+ "epoch": 5.57,
2236
+ "learning_rate": 2.4634726469588856e-05,
2237
+ "loss": 0.2868,
2238
+ "step": 3640
2239
+ },
2240
+ {
2241
+ "epoch": 5.58,
2242
+ "learning_rate": 2.4549779136935103e-05,
2243
+ "loss": 0.2278,
2244
+ "step": 3650
2245
+ },
2246
+ {
2247
+ "epoch": 5.6,
2248
+ "learning_rate": 2.4464831804281346e-05,
2249
+ "loss": 0.3065,
2250
+ "step": 3660
2251
+ },
2252
+ {
2253
+ "epoch": 5.61,
2254
+ "learning_rate": 2.4379884471627592e-05,
2255
+ "loss": 0.2801,
2256
+ "step": 3670
2257
+ },
2258
+ {
2259
+ "epoch": 5.63,
2260
+ "learning_rate": 2.4294937138973835e-05,
2261
+ "loss": 0.2434,
2262
+ "step": 3680
2263
+ },
2264
+ {
2265
+ "epoch": 5.64,
2266
+ "learning_rate": 2.4209989806320082e-05,
2267
+ "loss": 0.2846,
2268
+ "step": 3690
2269
+ },
2270
+ {
2271
+ "epoch": 5.66,
2272
+ "learning_rate": 2.4125042473666328e-05,
2273
+ "loss": 0.2783,
2274
+ "step": 3700
2275
+ },
2276
+ {
2277
+ "epoch": 5.67,
2278
+ "learning_rate": 2.404009514101257e-05,
2279
+ "loss": 0.271,
2280
+ "step": 3710
2281
+ },
2282
+ {
2283
+ "epoch": 5.69,
2284
+ "learning_rate": 2.3955147808358818e-05,
2285
+ "loss": 0.1965,
2286
+ "step": 3720
2287
+ },
2288
+ {
2289
+ "epoch": 5.7,
2290
+ "learning_rate": 2.3870200475705064e-05,
2291
+ "loss": 0.2836,
2292
+ "step": 3730
2293
+ },
2294
+ {
2295
+ "epoch": 5.72,
2296
+ "learning_rate": 2.3785253143051307e-05,
2297
+ "loss": 0.2765,
2298
+ "step": 3740
2299
+ },
2300
+ {
2301
+ "epoch": 5.73,
2302
+ "learning_rate": 2.3700305810397554e-05,
2303
+ "loss": 0.267,
2304
+ "step": 3750
2305
+ },
2306
+ {
2307
+ "epoch": 5.75,
2308
+ "learning_rate": 2.36153584777438e-05,
2309
+ "loss": 0.2518,
2310
+ "step": 3760
2311
+ },
2312
+ {
2313
+ "epoch": 5.76,
2314
+ "learning_rate": 2.3530411145090047e-05,
2315
+ "loss": 0.3005,
2316
+ "step": 3770
2317
+ },
2318
+ {
2319
+ "epoch": 5.78,
2320
+ "learning_rate": 2.3445463812436293e-05,
2321
+ "loss": 0.2205,
2322
+ "step": 3780
2323
+ },
2324
+ {
2325
+ "epoch": 5.79,
2326
+ "learning_rate": 2.3360516479782536e-05,
2327
+ "loss": 0.2288,
2328
+ "step": 3790
2329
+ },
2330
+ {
2331
+ "epoch": 5.81,
2332
+ "learning_rate": 2.3275569147128782e-05,
2333
+ "loss": 0.2786,
2334
+ "step": 3800
2335
+ },
2336
+ {
2337
+ "epoch": 5.83,
2338
+ "learning_rate": 2.319062181447503e-05,
2339
+ "loss": 0.2371,
2340
+ "step": 3810
2341
+ },
2342
+ {
2343
+ "epoch": 5.84,
2344
+ "learning_rate": 2.3105674481821272e-05,
2345
+ "loss": 0.2495,
2346
+ "step": 3820
2347
+ },
2348
+ {
2349
+ "epoch": 5.86,
2350
+ "learning_rate": 2.302072714916752e-05,
2351
+ "loss": 0.3372,
2352
+ "step": 3830
2353
+ },
2354
+ {
2355
+ "epoch": 5.87,
2356
+ "learning_rate": 2.2935779816513765e-05,
2357
+ "loss": 0.286,
2358
+ "step": 3840
2359
+ },
2360
+ {
2361
+ "epoch": 5.89,
2362
+ "learning_rate": 2.2850832483860008e-05,
2363
+ "loss": 0.1617,
2364
+ "step": 3850
2365
+ },
2366
+ {
2367
+ "epoch": 5.9,
2368
+ "learning_rate": 2.2765885151206254e-05,
2369
+ "loss": 0.3336,
2370
+ "step": 3860
2371
+ },
2372
+ {
2373
+ "epoch": 5.92,
2374
+ "learning_rate": 2.2680937818552497e-05,
2375
+ "loss": 0.2747,
2376
+ "step": 3870
2377
+ },
2378
+ {
2379
+ "epoch": 5.93,
2380
+ "learning_rate": 2.2595990485898744e-05,
2381
+ "loss": 0.2783,
2382
+ "step": 3880
2383
+ },
2384
+ {
2385
+ "epoch": 5.95,
2386
+ "learning_rate": 2.251104315324499e-05,
2387
+ "loss": 0.2528,
2388
+ "step": 3890
2389
+ },
2390
+ {
2391
+ "epoch": 5.96,
2392
+ "learning_rate": 2.2426095820591233e-05,
2393
+ "loss": 0.2145,
2394
+ "step": 3900
2395
+ },
2396
+ {
2397
+ "epoch": 5.98,
2398
+ "learning_rate": 2.234114848793748e-05,
2399
+ "loss": 0.2747,
2400
+ "step": 3910
2401
+ },
2402
+ {
2403
+ "epoch": 5.99,
2404
+ "learning_rate": 2.2256201155283726e-05,
2405
+ "loss": 0.276,
2406
+ "step": 3920
2407
+ },
2408
+ {
2409
+ "epoch": 6.0,
2410
+ "eval_accuracy": 0.7335243553008596,
2411
+ "eval_loss": 0.8875630497932434,
2412
+ "eval_runtime": 52.9673,
2413
+ "eval_samples_per_second": 98.835,
2414
+ "eval_steps_per_second": 12.366,
2415
+ "step": 3924
2416
+ },
2417
+ {
2418
+ "epoch": 6.01,
2419
+ "learning_rate": 2.217125382262997e-05,
2420
+ "loss": 0.2266,
2421
+ "step": 3930
2422
+ },
2423
+ {
2424
+ "epoch": 6.02,
2425
+ "learning_rate": 2.2086306489976216e-05,
2426
+ "loss": 0.1317,
2427
+ "step": 3940
2428
+ },
2429
+ {
2430
+ "epoch": 6.04,
2431
+ "learning_rate": 2.200135915732246e-05,
2432
+ "loss": 0.1782,
2433
+ "step": 3950
2434
+ },
2435
+ {
2436
+ "epoch": 6.05,
2437
+ "learning_rate": 2.1916411824668705e-05,
2438
+ "loss": 0.1682,
2439
+ "step": 3960
2440
+ },
2441
+ {
2442
+ "epoch": 6.07,
2443
+ "learning_rate": 2.183146449201495e-05,
2444
+ "loss": 0.136,
2445
+ "step": 3970
2446
+ },
2447
+ {
2448
+ "epoch": 6.09,
2449
+ "learning_rate": 2.1746517159361194e-05,
2450
+ "loss": 0.1868,
2451
+ "step": 3980
2452
+ },
2453
+ {
2454
+ "epoch": 6.1,
2455
+ "learning_rate": 2.166156982670744e-05,
2456
+ "loss": 0.2079,
2457
+ "step": 3990
2458
+ },
2459
+ {
2460
+ "epoch": 6.12,
2461
+ "learning_rate": 2.1576622494053687e-05,
2462
+ "loss": 0.1697,
2463
+ "step": 4000
2464
+ },
2465
+ {
2466
+ "epoch": 6.13,
2467
+ "learning_rate": 2.1491675161399934e-05,
2468
+ "loss": 0.1892,
2469
+ "step": 4010
2470
+ },
2471
+ {
2472
+ "epoch": 6.15,
2473
+ "learning_rate": 2.140672782874618e-05,
2474
+ "loss": 0.2033,
2475
+ "step": 4020
2476
+ },
2477
+ {
2478
+ "epoch": 6.16,
2479
+ "learning_rate": 2.1321780496092427e-05,
2480
+ "loss": 0.1372,
2481
+ "step": 4030
2482
+ },
2483
+ {
2484
+ "epoch": 6.18,
2485
+ "learning_rate": 2.123683316343867e-05,
2486
+ "loss": 0.199,
2487
+ "step": 4040
2488
+ },
2489
+ {
2490
+ "epoch": 6.19,
2491
+ "learning_rate": 2.1151885830784916e-05,
2492
+ "loss": 0.1655,
2493
+ "step": 4050
2494
+ },
2495
+ {
2496
+ "epoch": 6.21,
2497
+ "learning_rate": 2.106693849813116e-05,
2498
+ "loss": 0.1681,
2499
+ "step": 4060
2500
+ },
2501
+ {
2502
+ "epoch": 6.22,
2503
+ "learning_rate": 2.0981991165477406e-05,
2504
+ "loss": 0.2172,
2505
+ "step": 4070
2506
+ },
2507
+ {
2508
+ "epoch": 6.24,
2509
+ "learning_rate": 2.0897043832823652e-05,
2510
+ "loss": 0.1542,
2511
+ "step": 4080
2512
+ },
2513
+ {
2514
+ "epoch": 6.25,
2515
+ "learning_rate": 2.0812096500169895e-05,
2516
+ "loss": 0.1596,
2517
+ "step": 4090
2518
+ },
2519
+ {
2520
+ "epoch": 6.27,
2521
+ "learning_rate": 2.072714916751614e-05,
2522
+ "loss": 0.1871,
2523
+ "step": 4100
2524
+ },
2525
+ {
2526
+ "epoch": 6.28,
2527
+ "learning_rate": 2.0642201834862388e-05,
2528
+ "loss": 0.1852,
2529
+ "step": 4110
2530
+ },
2531
+ {
2532
+ "epoch": 6.3,
2533
+ "learning_rate": 2.055725450220863e-05,
2534
+ "loss": 0.2118,
2535
+ "step": 4120
2536
+ },
2537
+ {
2538
+ "epoch": 6.31,
2539
+ "learning_rate": 2.0472307169554877e-05,
2540
+ "loss": 0.1648,
2541
+ "step": 4130
2542
+ },
2543
+ {
2544
+ "epoch": 6.33,
2545
+ "learning_rate": 2.038735983690112e-05,
2546
+ "loss": 0.1759,
2547
+ "step": 4140
2548
+ },
2549
+ {
2550
+ "epoch": 6.35,
2551
+ "learning_rate": 2.0302412504247367e-05,
2552
+ "loss": 0.1385,
2553
+ "step": 4150
2554
+ },
2555
+ {
2556
+ "epoch": 6.36,
2557
+ "learning_rate": 2.0217465171593613e-05,
2558
+ "loss": 0.1848,
2559
+ "step": 4160
2560
+ },
2561
+ {
2562
+ "epoch": 6.38,
2563
+ "learning_rate": 2.0132517838939856e-05,
2564
+ "loss": 0.2324,
2565
+ "step": 4170
2566
+ },
2567
+ {
2568
+ "epoch": 6.39,
2569
+ "learning_rate": 2.0047570506286103e-05,
2570
+ "loss": 0.2223,
2571
+ "step": 4180
2572
+ },
2573
+ {
2574
+ "epoch": 6.41,
2575
+ "learning_rate": 1.996262317363235e-05,
2576
+ "loss": 0.1592,
2577
+ "step": 4190
2578
+ },
2579
+ {
2580
+ "epoch": 6.42,
2581
+ "learning_rate": 1.9877675840978592e-05,
2582
+ "loss": 0.1849,
2583
+ "step": 4200
2584
+ },
2585
+ {
2586
+ "epoch": 6.44,
2587
+ "learning_rate": 1.979272850832484e-05,
2588
+ "loss": 0.1592,
2589
+ "step": 4210
2590
+ },
2591
+ {
2592
+ "epoch": 6.45,
2593
+ "learning_rate": 1.9707781175671085e-05,
2594
+ "loss": 0.147,
2595
+ "step": 4220
2596
+ },
2597
+ {
2598
+ "epoch": 6.47,
2599
+ "learning_rate": 1.9622833843017328e-05,
2600
+ "loss": 0.2434,
2601
+ "step": 4230
2602
+ },
2603
+ {
2604
+ "epoch": 6.48,
2605
+ "learning_rate": 1.9537886510363575e-05,
2606
+ "loss": 0.1759,
2607
+ "step": 4240
2608
+ },
2609
+ {
2610
+ "epoch": 6.5,
2611
+ "learning_rate": 1.945293917770982e-05,
2612
+ "loss": 0.1086,
2613
+ "step": 4250
2614
+ },
2615
+ {
2616
+ "epoch": 6.51,
2617
+ "learning_rate": 1.9367991845056068e-05,
2618
+ "loss": 0.1473,
2619
+ "step": 4260
2620
+ },
2621
+ {
2622
+ "epoch": 6.53,
2623
+ "learning_rate": 1.9283044512402314e-05,
2624
+ "loss": 0.2267,
2625
+ "step": 4270
2626
+ },
2627
+ {
2628
+ "epoch": 6.54,
2629
+ "learning_rate": 1.9198097179748557e-05,
2630
+ "loss": 0.1617,
2631
+ "step": 4280
2632
+ },
2633
+ {
2634
+ "epoch": 6.56,
2635
+ "learning_rate": 1.9113149847094803e-05,
2636
+ "loss": 0.1352,
2637
+ "step": 4290
2638
+ },
2639
+ {
2640
+ "epoch": 6.57,
2641
+ "learning_rate": 1.902820251444105e-05,
2642
+ "loss": 0.1824,
2643
+ "step": 4300
2644
+ },
2645
+ {
2646
+ "epoch": 6.59,
2647
+ "learning_rate": 1.8943255181787293e-05,
2648
+ "loss": 0.2317,
2649
+ "step": 4310
2650
+ },
2651
+ {
2652
+ "epoch": 6.6,
2653
+ "learning_rate": 1.885830784913354e-05,
2654
+ "loss": 0.1288,
2655
+ "step": 4320
2656
+ },
2657
+ {
2658
+ "epoch": 6.62,
2659
+ "learning_rate": 1.8773360516479782e-05,
2660
+ "loss": 0.1705,
2661
+ "step": 4330
2662
+ },
2663
+ {
2664
+ "epoch": 6.64,
2665
+ "learning_rate": 1.868841318382603e-05,
2666
+ "loss": 0.1666,
2667
+ "step": 4340
2668
+ },
2669
+ {
2670
+ "epoch": 6.65,
2671
+ "learning_rate": 1.8603465851172275e-05,
2672
+ "loss": 0.1805,
2673
+ "step": 4350
2674
+ },
2675
+ {
2676
+ "epoch": 6.67,
2677
+ "learning_rate": 1.8518518518518518e-05,
2678
+ "loss": 0.1735,
2679
+ "step": 4360
2680
+ },
2681
+ {
2682
+ "epoch": 6.68,
2683
+ "learning_rate": 1.8433571185864765e-05,
2684
+ "loss": 0.1469,
2685
+ "step": 4370
2686
+ },
2687
+ {
2688
+ "epoch": 6.7,
2689
+ "learning_rate": 1.834862385321101e-05,
2690
+ "loss": 0.1741,
2691
+ "step": 4380
2692
+ },
2693
+ {
2694
+ "epoch": 6.71,
2695
+ "learning_rate": 1.8263676520557254e-05,
2696
+ "loss": 0.1928,
2697
+ "step": 4390
2698
+ },
2699
+ {
2700
+ "epoch": 6.73,
2701
+ "learning_rate": 1.81787291879035e-05,
2702
+ "loss": 0.1633,
2703
+ "step": 4400
2704
+ },
2705
+ {
2706
+ "epoch": 6.74,
2707
+ "learning_rate": 1.8093781855249747e-05,
2708
+ "loss": 0.171,
2709
+ "step": 4410
2710
+ },
2711
+ {
2712
+ "epoch": 6.76,
2713
+ "learning_rate": 1.800883452259599e-05,
2714
+ "loss": 0.1172,
2715
+ "step": 4420
2716
+ },
2717
+ {
2718
+ "epoch": 6.77,
2719
+ "learning_rate": 1.7923887189942237e-05,
2720
+ "loss": 0.1969,
2721
+ "step": 4430
2722
+ },
2723
+ {
2724
+ "epoch": 6.79,
2725
+ "learning_rate": 1.783893985728848e-05,
2726
+ "loss": 0.1892,
2727
+ "step": 4440
2728
+ },
2729
+ {
2730
+ "epoch": 6.8,
2731
+ "learning_rate": 1.7753992524634726e-05,
2732
+ "loss": 0.1663,
2733
+ "step": 4450
2734
+ },
2735
+ {
2736
+ "epoch": 6.82,
2737
+ "learning_rate": 1.7669045191980972e-05,
2738
+ "loss": 0.1804,
2739
+ "step": 4460
2740
+ },
2741
+ {
2742
+ "epoch": 6.83,
2743
+ "learning_rate": 1.7584097859327215e-05,
2744
+ "loss": 0.1847,
2745
+ "step": 4470
2746
+ },
2747
+ {
2748
+ "epoch": 6.85,
2749
+ "learning_rate": 1.7499150526673462e-05,
2750
+ "loss": 0.1542,
2751
+ "step": 4480
2752
+ },
2753
+ {
2754
+ "epoch": 6.86,
2755
+ "learning_rate": 1.741420319401971e-05,
2756
+ "loss": 0.2272,
2757
+ "step": 4490
2758
+ },
2759
+ {
2760
+ "epoch": 6.88,
2761
+ "learning_rate": 1.7329255861365955e-05,
2762
+ "loss": 0.2078,
2763
+ "step": 4500
2764
+ },
2765
+ {
2766
+ "epoch": 6.9,
2767
+ "learning_rate": 1.72443085287122e-05,
2768
+ "loss": 0.1508,
2769
+ "step": 4510
2770
+ },
2771
+ {
2772
+ "epoch": 6.91,
2773
+ "learning_rate": 1.7159361196058444e-05,
2774
+ "loss": 0.1619,
2775
+ "step": 4520
2776
+ },
2777
+ {
2778
+ "epoch": 6.93,
2779
+ "learning_rate": 1.707441386340469e-05,
2780
+ "loss": 0.147,
2781
+ "step": 4530
2782
+ },
2783
+ {
2784
+ "epoch": 6.94,
2785
+ "learning_rate": 1.6989466530750937e-05,
2786
+ "loss": 0.1696,
2787
+ "step": 4540
2788
+ },
2789
+ {
2790
+ "epoch": 6.96,
2791
+ "learning_rate": 1.690451919809718e-05,
2792
+ "loss": 0.1835,
2793
+ "step": 4550
2794
+ },
2795
+ {
2796
+ "epoch": 6.97,
2797
+ "learning_rate": 1.6819571865443427e-05,
2798
+ "loss": 0.2407,
2799
+ "step": 4560
2800
+ },
2801
+ {
2802
+ "epoch": 6.99,
2803
+ "learning_rate": 1.6734624532789673e-05,
2804
+ "loss": 0.2217,
2805
+ "step": 4570
2806
+ },
2807
+ {
2808
+ "epoch": 7.0,
2809
+ "eval_accuracy": 0.7255014326647564,
2810
+ "eval_loss": 0.975210428237915,
2811
+ "eval_runtime": 48.2463,
2812
+ "eval_samples_per_second": 108.506,
2813
+ "eval_steps_per_second": 13.576,
2814
+ "step": 4578
2815
+ },
2816
+ {
2817
+ "epoch": 7.0,
2818
+ "learning_rate": 1.6649677200135916e-05,
2819
+ "loss": 0.1381,
2820
+ "step": 4580
2821
+ },
2822
+ {
2823
+ "epoch": 7.02,
2824
+ "learning_rate": 1.6564729867482163e-05,
2825
+ "loss": 0.1104,
2826
+ "step": 4590
2827
+ },
2828
+ {
2829
+ "epoch": 7.03,
2830
+ "learning_rate": 1.647978253482841e-05,
2831
+ "loss": 0.1027,
2832
+ "step": 4600
2833
+ },
2834
+ {
2835
+ "epoch": 7.05,
2836
+ "learning_rate": 1.6394835202174652e-05,
2837
+ "loss": 0.0925,
2838
+ "step": 4610
2839
+ },
2840
+ {
2841
+ "epoch": 7.06,
2842
+ "learning_rate": 1.63098878695209e-05,
2843
+ "loss": 0.0849,
2844
+ "step": 4620
2845
+ },
2846
+ {
2847
+ "epoch": 7.08,
2848
+ "learning_rate": 1.622494053686714e-05,
2849
+ "loss": 0.1063,
2850
+ "step": 4630
2851
+ },
2852
+ {
2853
+ "epoch": 7.09,
2854
+ "learning_rate": 1.6139993204213388e-05,
2855
+ "loss": 0.1039,
2856
+ "step": 4640
2857
+ },
2858
+ {
2859
+ "epoch": 7.11,
2860
+ "learning_rate": 1.6055045871559634e-05,
2861
+ "loss": 0.0816,
2862
+ "step": 4650
2863
+ },
2864
+ {
2865
+ "epoch": 7.12,
2866
+ "learning_rate": 1.5970098538905877e-05,
2867
+ "loss": 0.1369,
2868
+ "step": 4660
2869
+ },
2870
+ {
2871
+ "epoch": 7.14,
2872
+ "learning_rate": 1.5885151206252124e-05,
2873
+ "loss": 0.072,
2874
+ "step": 4670
2875
+ },
2876
+ {
2877
+ "epoch": 7.16,
2878
+ "learning_rate": 1.580020387359837e-05,
2879
+ "loss": 0.1572,
2880
+ "step": 4680
2881
+ },
2882
+ {
2883
+ "epoch": 7.17,
2884
+ "learning_rate": 1.5715256540944613e-05,
2885
+ "loss": 0.1364,
2886
+ "step": 4690
2887
+ },
2888
+ {
2889
+ "epoch": 7.19,
2890
+ "learning_rate": 1.563030920829086e-05,
2891
+ "loss": 0.1231,
2892
+ "step": 4700
2893
+ },
2894
+ {
2895
+ "epoch": 7.2,
2896
+ "learning_rate": 1.5545361875637103e-05,
2897
+ "loss": 0.1029,
2898
+ "step": 4710
2899
+ },
2900
+ {
2901
+ "epoch": 7.22,
2902
+ "learning_rate": 1.546041454298335e-05,
2903
+ "loss": 0.1086,
2904
+ "step": 4720
2905
+ },
2906
+ {
2907
+ "epoch": 7.23,
2908
+ "learning_rate": 1.5375467210329596e-05,
2909
+ "loss": 0.1288,
2910
+ "step": 4730
2911
+ },
2912
+ {
2913
+ "epoch": 7.25,
2914
+ "learning_rate": 1.5290519877675842e-05,
2915
+ "loss": 0.0978,
2916
+ "step": 4740
2917
+ },
2918
+ {
2919
+ "epoch": 7.26,
2920
+ "learning_rate": 1.5205572545022087e-05,
2921
+ "loss": 0.132,
2922
+ "step": 4750
2923
+ },
2924
+ {
2925
+ "epoch": 7.28,
2926
+ "learning_rate": 1.5120625212368333e-05,
2927
+ "loss": 0.1073,
2928
+ "step": 4760
2929
+ },
2930
+ {
2931
+ "epoch": 7.29,
2932
+ "learning_rate": 1.5035677879714576e-05,
2933
+ "loss": 0.0635,
2934
+ "step": 4770
2935
+ },
2936
+ {
2937
+ "epoch": 7.31,
2938
+ "learning_rate": 1.4950730547060823e-05,
2939
+ "loss": 0.1048,
2940
+ "step": 4780
2941
+ },
2942
+ {
2943
+ "epoch": 7.32,
2944
+ "learning_rate": 1.4865783214407069e-05,
2945
+ "loss": 0.0925,
2946
+ "step": 4790
2947
+ },
2948
+ {
2949
+ "epoch": 7.34,
2950
+ "learning_rate": 1.4780835881753314e-05,
2951
+ "loss": 0.0982,
2952
+ "step": 4800
2953
+ },
2954
+ {
2955
+ "epoch": 7.35,
2956
+ "learning_rate": 1.469588854909956e-05,
2957
+ "loss": 0.1385,
2958
+ "step": 4810
2959
+ },
2960
+ {
2961
+ "epoch": 7.37,
2962
+ "learning_rate": 1.4610941216445803e-05,
2963
+ "loss": 0.112,
2964
+ "step": 4820
2965
+ },
2966
+ {
2967
+ "epoch": 7.38,
2968
+ "learning_rate": 1.452599388379205e-05,
2969
+ "loss": 0.1666,
2970
+ "step": 4830
2971
+ },
2972
+ {
2973
+ "epoch": 7.4,
2974
+ "learning_rate": 1.4441046551138296e-05,
2975
+ "loss": 0.1001,
2976
+ "step": 4840
2977
+ },
2978
+ {
2979
+ "epoch": 7.41,
2980
+ "learning_rate": 1.435609921848454e-05,
2981
+ "loss": 0.1285,
2982
+ "step": 4850
2983
+ },
2984
+ {
2985
+ "epoch": 7.43,
2986
+ "learning_rate": 1.4271151885830786e-05,
2987
+ "loss": 0.1185,
2988
+ "step": 4860
2989
+ },
2990
+ {
2991
+ "epoch": 7.45,
2992
+ "learning_rate": 1.4186204553177032e-05,
2993
+ "loss": 0.0936,
2994
+ "step": 4870
2995
+ },
2996
+ {
2997
+ "epoch": 7.46,
2998
+ "learning_rate": 1.4101257220523275e-05,
2999
+ "loss": 0.0754,
3000
+ "step": 4880
3001
+ },
3002
+ {
3003
+ "epoch": 7.48,
3004
+ "learning_rate": 1.4016309887869522e-05,
3005
+ "loss": 0.0842,
3006
+ "step": 4890
3007
+ },
3008
+ {
3009
+ "epoch": 7.49,
3010
+ "learning_rate": 1.3931362555215766e-05,
3011
+ "loss": 0.1387,
3012
+ "step": 4900
3013
+ },
3014
+ {
3015
+ "epoch": 7.51,
3016
+ "learning_rate": 1.3846415222562013e-05,
3017
+ "loss": 0.1285,
3018
+ "step": 4910
3019
+ },
3020
+ {
3021
+ "epoch": 7.52,
3022
+ "learning_rate": 1.3761467889908258e-05,
3023
+ "loss": 0.1909,
3024
+ "step": 4920
3025
+ },
3026
+ {
3027
+ "epoch": 7.54,
3028
+ "learning_rate": 1.3676520557254502e-05,
3029
+ "loss": 0.0862,
3030
+ "step": 4930
3031
+ },
3032
+ {
3033
+ "epoch": 7.55,
3034
+ "learning_rate": 1.3591573224600749e-05,
3035
+ "loss": 0.0951,
3036
+ "step": 4940
3037
+ },
3038
+ {
3039
+ "epoch": 7.57,
3040
+ "learning_rate": 1.3506625891946995e-05,
3041
+ "loss": 0.0812,
3042
+ "step": 4950
3043
+ },
3044
+ {
3045
+ "epoch": 7.58,
3046
+ "learning_rate": 1.3421678559293238e-05,
3047
+ "loss": 0.1107,
3048
+ "step": 4960
3049
+ },
3050
+ {
3051
+ "epoch": 7.6,
3052
+ "learning_rate": 1.3336731226639485e-05,
3053
+ "loss": 0.1025,
3054
+ "step": 4970
3055
+ },
3056
+ {
3057
+ "epoch": 7.61,
3058
+ "learning_rate": 1.3251783893985731e-05,
3059
+ "loss": 0.1235,
3060
+ "step": 4980
3061
+ },
3062
+ {
3063
+ "epoch": 7.63,
3064
+ "learning_rate": 1.3166836561331974e-05,
3065
+ "loss": 0.1164,
3066
+ "step": 4990
3067
+ },
3068
+ {
3069
+ "epoch": 7.64,
3070
+ "learning_rate": 1.308188922867822e-05,
3071
+ "loss": 0.1105,
3072
+ "step": 5000
3073
+ },
3074
+ {
3075
+ "epoch": 7.66,
3076
+ "learning_rate": 1.2996941896024464e-05,
3077
+ "loss": 0.0727,
3078
+ "step": 5010
3079
+ },
3080
+ {
3081
+ "epoch": 7.67,
3082
+ "learning_rate": 1.291199456337071e-05,
3083
+ "loss": 0.1,
3084
+ "step": 5020
3085
+ },
3086
+ {
3087
+ "epoch": 7.69,
3088
+ "learning_rate": 1.2827047230716956e-05,
3089
+ "loss": 0.0591,
3090
+ "step": 5030
3091
+ },
3092
+ {
3093
+ "epoch": 7.71,
3094
+ "learning_rate": 1.2742099898063201e-05,
3095
+ "loss": 0.1469,
3096
+ "step": 5040
3097
+ },
3098
+ {
3099
+ "epoch": 7.72,
3100
+ "learning_rate": 1.2657152565409448e-05,
3101
+ "loss": 0.094,
3102
+ "step": 5050
3103
+ },
3104
+ {
3105
+ "epoch": 7.74,
3106
+ "learning_rate": 1.2572205232755694e-05,
3107
+ "loss": 0.1413,
3108
+ "step": 5060
3109
+ },
3110
+ {
3111
+ "epoch": 7.75,
3112
+ "learning_rate": 1.2487257900101937e-05,
3113
+ "loss": 0.1544,
3114
+ "step": 5070
3115
+ },
3116
+ {
3117
+ "epoch": 7.77,
3118
+ "learning_rate": 1.2402310567448183e-05,
3119
+ "loss": 0.0947,
3120
+ "step": 5080
3121
+ },
3122
+ {
3123
+ "epoch": 7.78,
3124
+ "learning_rate": 1.2317363234794428e-05,
3125
+ "loss": 0.0794,
3126
+ "step": 5090
3127
+ },
3128
+ {
3129
+ "epoch": 7.8,
3130
+ "learning_rate": 1.2232415902140673e-05,
3131
+ "loss": 0.0734,
3132
+ "step": 5100
3133
+ },
3134
+ {
3135
+ "epoch": 7.81,
3136
+ "learning_rate": 1.2147468569486918e-05,
3137
+ "loss": 0.1328,
3138
+ "step": 5110
3139
+ },
3140
+ {
3141
+ "epoch": 7.83,
3142
+ "learning_rate": 1.2062521236833164e-05,
3143
+ "loss": 0.1425,
3144
+ "step": 5120
3145
+ },
3146
+ {
3147
+ "epoch": 7.84,
3148
+ "learning_rate": 1.1977573904179409e-05,
3149
+ "loss": 0.1404,
3150
+ "step": 5130
3151
+ },
3152
+ {
3153
+ "epoch": 7.86,
3154
+ "learning_rate": 1.1892626571525654e-05,
3155
+ "loss": 0.1005,
3156
+ "step": 5140
3157
+ },
3158
+ {
3159
+ "epoch": 7.87,
3160
+ "learning_rate": 1.18076792388719e-05,
3161
+ "loss": 0.1057,
3162
+ "step": 5150
3163
+ },
3164
+ {
3165
+ "epoch": 7.89,
3166
+ "learning_rate": 1.1722731906218146e-05,
3167
+ "loss": 0.089,
3168
+ "step": 5160
3169
+ },
3170
+ {
3171
+ "epoch": 7.9,
3172
+ "learning_rate": 1.1637784573564391e-05,
3173
+ "loss": 0.1242,
3174
+ "step": 5170
3175
+ },
3176
+ {
3177
+ "epoch": 7.92,
3178
+ "learning_rate": 1.1552837240910636e-05,
3179
+ "loss": 0.114,
3180
+ "step": 5180
3181
+ },
3182
+ {
3183
+ "epoch": 7.93,
3184
+ "learning_rate": 1.1467889908256882e-05,
3185
+ "loss": 0.0863,
3186
+ "step": 5190
3187
+ },
3188
+ {
3189
+ "epoch": 7.95,
3190
+ "learning_rate": 1.1382942575603127e-05,
3191
+ "loss": 0.1092,
3192
+ "step": 5200
3193
+ },
3194
+ {
3195
+ "epoch": 7.97,
3196
+ "learning_rate": 1.1297995242949372e-05,
3197
+ "loss": 0.1331,
3198
+ "step": 5210
3199
+ },
3200
+ {
3201
+ "epoch": 7.98,
3202
+ "learning_rate": 1.1213047910295617e-05,
3203
+ "loss": 0.0932,
3204
+ "step": 5220
3205
+ },
3206
+ {
3207
+ "epoch": 8.0,
3208
+ "learning_rate": 1.1128100577641863e-05,
3209
+ "loss": 0.0646,
3210
+ "step": 5230
3211
+ },
3212
+ {
3213
+ "epoch": 8.0,
3214
+ "eval_accuracy": 0.7262655205348615,
3215
+ "eval_loss": 1.0956766605377197,
3216
+ "eval_runtime": 48.2758,
3217
+ "eval_samples_per_second": 108.439,
3218
+ "eval_steps_per_second": 13.568,
3219
+ "step": 5232
3220
+ },
3221
+ {
3222
+ "epoch": 8.01,
3223
+ "learning_rate": 1.1043153244988108e-05,
3224
+ "loss": 0.0684,
3225
+ "step": 5240
3226
+ },
3227
+ {
3228
+ "epoch": 8.03,
3229
+ "learning_rate": 1.0958205912334353e-05,
3230
+ "loss": 0.0661,
3231
+ "step": 5250
3232
+ },
3233
+ {
3234
+ "epoch": 8.04,
3235
+ "learning_rate": 1.0873258579680597e-05,
3236
+ "loss": 0.1048,
3237
+ "step": 5260
3238
+ },
3239
+ {
3240
+ "epoch": 8.06,
3241
+ "learning_rate": 1.0788311247026844e-05,
3242
+ "loss": 0.0763,
3243
+ "step": 5270
3244
+ },
3245
+ {
3246
+ "epoch": 8.07,
3247
+ "learning_rate": 1.070336391437309e-05,
3248
+ "loss": 0.0477,
3249
+ "step": 5280
3250
+ },
3251
+ {
3252
+ "epoch": 8.09,
3253
+ "learning_rate": 1.0618416581719335e-05,
3254
+ "loss": 0.0662,
3255
+ "step": 5290
3256
+ },
3257
+ {
3258
+ "epoch": 8.1,
3259
+ "learning_rate": 1.053346924906558e-05,
3260
+ "loss": 0.0479,
3261
+ "step": 5300
3262
+ },
3263
+ {
3264
+ "epoch": 8.12,
3265
+ "learning_rate": 1.0448521916411826e-05,
3266
+ "loss": 0.065,
3267
+ "step": 5310
3268
+ },
3269
+ {
3270
+ "epoch": 8.13,
3271
+ "learning_rate": 1.036357458375807e-05,
3272
+ "loss": 0.0685,
3273
+ "step": 5320
3274
+ },
3275
+ {
3276
+ "epoch": 8.15,
3277
+ "learning_rate": 1.0278627251104316e-05,
3278
+ "loss": 0.0763,
3279
+ "step": 5330
3280
+ },
3281
+ {
3282
+ "epoch": 8.16,
3283
+ "learning_rate": 1.019367991845056e-05,
3284
+ "loss": 0.061,
3285
+ "step": 5340
3286
+ },
3287
+ {
3288
+ "epoch": 8.18,
3289
+ "learning_rate": 1.0108732585796807e-05,
3290
+ "loss": 0.0848,
3291
+ "step": 5350
3292
+ },
3293
+ {
3294
+ "epoch": 8.19,
3295
+ "learning_rate": 1.0023785253143051e-05,
3296
+ "loss": 0.0621,
3297
+ "step": 5360
3298
+ },
3299
+ {
3300
+ "epoch": 8.21,
3301
+ "learning_rate": 9.938837920489296e-06,
3302
+ "loss": 0.0505,
3303
+ "step": 5370
3304
+ },
3305
+ {
3306
+ "epoch": 8.23,
3307
+ "learning_rate": 9.853890587835543e-06,
3308
+ "loss": 0.0823,
3309
+ "step": 5380
3310
+ },
3311
+ {
3312
+ "epoch": 8.24,
3313
+ "learning_rate": 9.768943255181787e-06,
3314
+ "loss": 0.0637,
3315
+ "step": 5390
3316
+ },
3317
+ {
3318
+ "epoch": 8.26,
3319
+ "learning_rate": 9.683995922528034e-06,
3320
+ "loss": 0.05,
3321
+ "step": 5400
3322
+ },
3323
+ {
3324
+ "epoch": 8.27,
3325
+ "learning_rate": 9.599048589874278e-06,
3326
+ "loss": 0.0393,
3327
+ "step": 5410
3328
+ },
3329
+ {
3330
+ "epoch": 8.29,
3331
+ "learning_rate": 9.514101257220525e-06,
3332
+ "loss": 0.0983,
3333
+ "step": 5420
3334
+ },
3335
+ {
3336
+ "epoch": 8.3,
3337
+ "learning_rate": 9.42915392456677e-06,
3338
+ "loss": 0.0467,
3339
+ "step": 5430
3340
+ },
3341
+ {
3342
+ "epoch": 8.32,
3343
+ "learning_rate": 9.344206591913014e-06,
3344
+ "loss": 0.0898,
3345
+ "step": 5440
3346
+ },
3347
+ {
3348
+ "epoch": 8.33,
3349
+ "learning_rate": 9.259259259259259e-06,
3350
+ "loss": 0.0757,
3351
+ "step": 5450
3352
+ },
3353
+ {
3354
+ "epoch": 8.35,
3355
+ "learning_rate": 9.174311926605506e-06,
3356
+ "loss": 0.0849,
3357
+ "step": 5460
3358
+ },
3359
+ {
3360
+ "epoch": 8.36,
3361
+ "learning_rate": 9.08936459395175e-06,
3362
+ "loss": 0.0653,
3363
+ "step": 5470
3364
+ },
3365
+ {
3366
+ "epoch": 8.38,
3367
+ "learning_rate": 9.004417261297995e-06,
3368
+ "loss": 0.0816,
3369
+ "step": 5480
3370
+ },
3371
+ {
3372
+ "epoch": 8.39,
3373
+ "learning_rate": 8.91946992864424e-06,
3374
+ "loss": 0.0846,
3375
+ "step": 5490
3376
+ },
3377
+ {
3378
+ "epoch": 8.41,
3379
+ "learning_rate": 8.834522595990486e-06,
3380
+ "loss": 0.0766,
3381
+ "step": 5500
3382
+ },
3383
+ {
3384
+ "epoch": 8.42,
3385
+ "learning_rate": 8.749575263336731e-06,
3386
+ "loss": 0.0697,
3387
+ "step": 5510
3388
+ },
3389
+ {
3390
+ "epoch": 8.44,
3391
+ "learning_rate": 8.664627930682977e-06,
3392
+ "loss": 0.0611,
3393
+ "step": 5520
3394
+ },
3395
+ {
3396
+ "epoch": 8.45,
3397
+ "learning_rate": 8.579680598029222e-06,
3398
+ "loss": 0.0808,
3399
+ "step": 5530
3400
+ },
3401
+ {
3402
+ "epoch": 8.47,
3403
+ "learning_rate": 8.494733265375469e-06,
3404
+ "loss": 0.0697,
3405
+ "step": 5540
3406
+ },
3407
+ {
3408
+ "epoch": 8.48,
3409
+ "learning_rate": 8.409785932721713e-06,
3410
+ "loss": 0.1034,
3411
+ "step": 5550
3412
+ },
3413
+ {
3414
+ "epoch": 8.5,
3415
+ "learning_rate": 8.324838600067958e-06,
3416
+ "loss": 0.0583,
3417
+ "step": 5560
3418
+ },
3419
+ {
3420
+ "epoch": 8.52,
3421
+ "learning_rate": 8.239891267414204e-06,
3422
+ "loss": 0.0638,
3423
+ "step": 5570
3424
+ },
3425
+ {
3426
+ "epoch": 8.53,
3427
+ "learning_rate": 8.15494393476045e-06,
3428
+ "loss": 0.0319,
3429
+ "step": 5580
3430
+ },
3431
+ {
3432
+ "epoch": 8.55,
3433
+ "learning_rate": 8.069996602106694e-06,
3434
+ "loss": 0.071,
3435
+ "step": 5590
3436
+ },
3437
+ {
3438
+ "epoch": 8.56,
3439
+ "learning_rate": 7.985049269452939e-06,
3440
+ "loss": 0.0929,
3441
+ "step": 5600
3442
+ },
3443
+ {
3444
+ "epoch": 8.58,
3445
+ "learning_rate": 7.900101936799185e-06,
3446
+ "loss": 0.0728,
3447
+ "step": 5610
3448
+ },
3449
+ {
3450
+ "epoch": 8.59,
3451
+ "learning_rate": 7.81515460414543e-06,
3452
+ "loss": 0.0868,
3453
+ "step": 5620
3454
+ },
3455
+ {
3456
+ "epoch": 8.61,
3457
+ "learning_rate": 7.730207271491675e-06,
3458
+ "loss": 0.0725,
3459
+ "step": 5630
3460
+ },
3461
+ {
3462
+ "epoch": 8.62,
3463
+ "learning_rate": 7.645259938837921e-06,
3464
+ "loss": 0.1055,
3465
+ "step": 5640
3466
+ },
3467
+ {
3468
+ "epoch": 8.64,
3469
+ "learning_rate": 7.560312606184167e-06,
3470
+ "loss": 0.0678,
3471
+ "step": 5650
3472
+ },
3473
+ {
3474
+ "epoch": 8.65,
3475
+ "learning_rate": 7.475365273530411e-06,
3476
+ "loss": 0.0914,
3477
+ "step": 5660
3478
+ },
3479
+ {
3480
+ "epoch": 8.67,
3481
+ "learning_rate": 7.390417940876657e-06,
3482
+ "loss": 0.0646,
3483
+ "step": 5670
3484
+ },
3485
+ {
3486
+ "epoch": 8.68,
3487
+ "learning_rate": 7.305470608222902e-06,
3488
+ "loss": 0.0699,
3489
+ "step": 5680
3490
+ },
3491
+ {
3492
+ "epoch": 8.7,
3493
+ "learning_rate": 7.220523275569148e-06,
3494
+ "loss": 0.071,
3495
+ "step": 5690
3496
+ },
3497
+ {
3498
+ "epoch": 8.71,
3499
+ "learning_rate": 7.135575942915393e-06,
3500
+ "loss": 0.1006,
3501
+ "step": 5700
3502
+ },
3503
+ {
3504
+ "epoch": 8.73,
3505
+ "learning_rate": 7.050628610261638e-06,
3506
+ "loss": 0.0402,
3507
+ "step": 5710
3508
+ },
3509
+ {
3510
+ "epoch": 8.74,
3511
+ "learning_rate": 6.965681277607883e-06,
3512
+ "loss": 0.1133,
3513
+ "step": 5720
3514
+ },
3515
+ {
3516
+ "epoch": 8.76,
3517
+ "learning_rate": 6.880733944954129e-06,
3518
+ "loss": 0.0616,
3519
+ "step": 5730
3520
+ },
3521
+ {
3522
+ "epoch": 8.78,
3523
+ "learning_rate": 6.795786612300374e-06,
3524
+ "loss": 0.0836,
3525
+ "step": 5740
3526
+ },
3527
+ {
3528
+ "epoch": 8.79,
3529
+ "learning_rate": 6.710839279646619e-06,
3530
+ "loss": 0.0516,
3531
+ "step": 5750
3532
+ },
3533
+ {
3534
+ "epoch": 8.81,
3535
+ "learning_rate": 6.6258919469928655e-06,
3536
+ "loss": 0.0467,
3537
+ "step": 5760
3538
+ },
3539
+ {
3540
+ "epoch": 8.82,
3541
+ "learning_rate": 6.54094461433911e-06,
3542
+ "loss": 0.1025,
3543
+ "step": 5770
3544
+ },
3545
+ {
3546
+ "epoch": 8.84,
3547
+ "learning_rate": 6.455997281685355e-06,
3548
+ "loss": 0.071,
3549
+ "step": 5780
3550
+ },
3551
+ {
3552
+ "epoch": 8.85,
3553
+ "learning_rate": 6.3710499490316006e-06,
3554
+ "loss": 0.154,
3555
+ "step": 5790
3556
+ },
3557
+ {
3558
+ "epoch": 8.87,
3559
+ "learning_rate": 6.286102616377847e-06,
3560
+ "loss": 0.0633,
3561
+ "step": 5800
3562
+ },
3563
+ {
3564
+ "epoch": 8.88,
3565
+ "learning_rate": 6.201155283724092e-06,
3566
+ "loss": 0.0912,
3567
+ "step": 5810
3568
+ },
3569
+ {
3570
+ "epoch": 8.9,
3571
+ "learning_rate": 6.1162079510703365e-06,
3572
+ "loss": 0.051,
3573
+ "step": 5820
3574
+ },
3575
+ {
3576
+ "epoch": 8.91,
3577
+ "learning_rate": 6.031260618416582e-06,
3578
+ "loss": 0.0584,
3579
+ "step": 5830
3580
+ },
3581
+ {
3582
+ "epoch": 8.93,
3583
+ "learning_rate": 5.946313285762827e-06,
3584
+ "loss": 0.0714,
3585
+ "step": 5840
3586
+ },
3587
+ {
3588
+ "epoch": 8.94,
3589
+ "learning_rate": 5.861365953109073e-06,
3590
+ "loss": 0.0534,
3591
+ "step": 5850
3592
+ },
3593
+ {
3594
+ "epoch": 8.96,
3595
+ "learning_rate": 5.776418620455318e-06,
3596
+ "loss": 0.0411,
3597
+ "step": 5860
3598
+ },
3599
+ {
3600
+ "epoch": 8.97,
3601
+ "learning_rate": 5.6914712878015636e-06,
3602
+ "loss": 0.0461,
3603
+ "step": 5870
3604
+ },
3605
+ {
3606
+ "epoch": 8.99,
3607
+ "learning_rate": 5.606523955147808e-06,
3608
+ "loss": 0.063,
3609
+ "step": 5880
3610
+ },
3611
+ {
3612
+ "epoch": 9.0,
3613
+ "eval_accuracy": 0.7262655205348615,
3614
+ "eval_loss": 1.1334831714630127,
3615
+ "eval_runtime": 48.13,
3616
+ "eval_samples_per_second": 108.768,
3617
+ "eval_steps_per_second": 13.609,
3618
+ "step": 5887
3619
+ },
3620
+ {
3621
+ "epoch": 9.0,
3622
+ "learning_rate": 5.521576622494054e-06,
3623
+ "loss": 0.0773,
3624
+ "step": 5890
3625
+ },
3626
+ {
3627
+ "epoch": 9.02,
3628
+ "learning_rate": 5.436629289840299e-06,
3629
+ "loss": 0.0471,
3630
+ "step": 5900
3631
+ },
3632
+ {
3633
+ "epoch": 9.04,
3634
+ "learning_rate": 5.351681957186545e-06,
3635
+ "loss": 0.0497,
3636
+ "step": 5910
3637
+ },
3638
+ {
3639
+ "epoch": 9.05,
3640
+ "learning_rate": 5.26673462453279e-06,
3641
+ "loss": 0.1033,
3642
+ "step": 5920
3643
+ },
3644
+ {
3645
+ "epoch": 9.07,
3646
+ "learning_rate": 5.181787291879035e-06,
3647
+ "loss": 0.0409,
3648
+ "step": 5930
3649
+ },
3650
+ {
3651
+ "epoch": 9.08,
3652
+ "learning_rate": 5.09683995922528e-06,
3653
+ "loss": 0.0626,
3654
+ "step": 5940
3655
+ },
3656
+ {
3657
+ "epoch": 9.1,
3658
+ "learning_rate": 5.011892626571526e-06,
3659
+ "loss": 0.05,
3660
+ "step": 5950
3661
+ },
3662
+ {
3663
+ "epoch": 9.11,
3664
+ "learning_rate": 4.926945293917771e-06,
3665
+ "loss": 0.0466,
3666
+ "step": 5960
3667
+ },
3668
+ {
3669
+ "epoch": 9.13,
3670
+ "learning_rate": 4.841997961264017e-06,
3671
+ "loss": 0.0698,
3672
+ "step": 5970
3673
+ },
3674
+ {
3675
+ "epoch": 9.14,
3676
+ "learning_rate": 4.7570506286102625e-06,
3677
+ "loss": 0.0583,
3678
+ "step": 5980
3679
+ },
3680
+ {
3681
+ "epoch": 9.16,
3682
+ "learning_rate": 4.672103295956507e-06,
3683
+ "loss": 0.0232,
3684
+ "step": 5990
3685
+ },
3686
+ {
3687
+ "epoch": 9.17,
3688
+ "learning_rate": 4.587155963302753e-06,
3689
+ "loss": 0.0466,
3690
+ "step": 6000
3691
+ },
3692
+ {
3693
+ "epoch": 9.19,
3694
+ "learning_rate": 4.5022086306489975e-06,
3695
+ "loss": 0.0613,
3696
+ "step": 6010
3697
+ },
3698
+ {
3699
+ "epoch": 9.2,
3700
+ "learning_rate": 4.417261297995243e-06,
3701
+ "loss": 0.0691,
3702
+ "step": 6020
3703
+ },
3704
+ {
3705
+ "epoch": 9.22,
3706
+ "learning_rate": 4.332313965341489e-06,
3707
+ "loss": 0.0281,
3708
+ "step": 6030
3709
+ },
3710
+ {
3711
+ "epoch": 9.23,
3712
+ "learning_rate": 4.247366632687734e-06,
3713
+ "loss": 0.0601,
3714
+ "step": 6040
3715
+ },
3716
+ {
3717
+ "epoch": 9.25,
3718
+ "learning_rate": 4.162419300033979e-06,
3719
+ "loss": 0.059,
3720
+ "step": 6050
3721
+ },
3722
+ {
3723
+ "epoch": 9.26,
3724
+ "learning_rate": 4.077471967380225e-06,
3725
+ "loss": 0.0577,
3726
+ "step": 6060
3727
+ },
3728
+ {
3729
+ "epoch": 9.28,
3730
+ "learning_rate": 3.992524634726469e-06,
3731
+ "loss": 0.085,
3732
+ "step": 6070
3733
+ },
3734
+ {
3735
+ "epoch": 9.29,
3736
+ "learning_rate": 3.907577302072715e-06,
3737
+ "loss": 0.0246,
3738
+ "step": 6080
3739
+ },
3740
+ {
3741
+ "epoch": 9.31,
3742
+ "learning_rate": 3.8226299694189605e-06,
3743
+ "loss": 0.0697,
3744
+ "step": 6090
3745
+ },
3746
+ {
3747
+ "epoch": 9.33,
3748
+ "learning_rate": 3.7376826367652057e-06,
3749
+ "loss": 0.0333,
3750
+ "step": 6100
3751
+ },
3752
+ {
3753
+ "epoch": 9.34,
3754
+ "learning_rate": 3.652735304111451e-06,
3755
+ "loss": 0.0165,
3756
+ "step": 6110
3757
+ },
3758
+ {
3759
+ "epoch": 9.36,
3760
+ "learning_rate": 3.5677879714576964e-06,
3761
+ "loss": 0.0401,
3762
+ "step": 6120
3763
+ },
3764
+ {
3765
+ "epoch": 9.37,
3766
+ "learning_rate": 3.4828406388039416e-06,
3767
+ "loss": 0.0526,
3768
+ "step": 6130
3769
+ },
3770
+ {
3771
+ "epoch": 9.39,
3772
+ "learning_rate": 3.397893306150187e-06,
3773
+ "loss": 0.0617,
3774
+ "step": 6140
3775
+ },
3776
+ {
3777
+ "epoch": 9.4,
3778
+ "learning_rate": 3.3129459734964328e-06,
3779
+ "loss": 0.0576,
3780
+ "step": 6150
3781
+ },
3782
+ {
3783
+ "epoch": 9.42,
3784
+ "learning_rate": 3.2279986408426775e-06,
3785
+ "loss": 0.0541,
3786
+ "step": 6160
3787
+ },
3788
+ {
3789
+ "epoch": 9.43,
3790
+ "learning_rate": 3.1430513081889235e-06,
3791
+ "loss": 0.0522,
3792
+ "step": 6170
3793
+ },
3794
+ {
3795
+ "epoch": 9.45,
3796
+ "learning_rate": 3.0581039755351682e-06,
3797
+ "loss": 0.031,
3798
+ "step": 6180
3799
+ },
3800
+ {
3801
+ "epoch": 9.46,
3802
+ "learning_rate": 2.9731566428814134e-06,
3803
+ "loss": 0.0298,
3804
+ "step": 6190
3805
+ },
3806
+ {
3807
+ "epoch": 9.48,
3808
+ "learning_rate": 2.888209310227659e-06,
3809
+ "loss": 0.022,
3810
+ "step": 6200
3811
+ },
3812
+ {
3813
+ "epoch": 9.49,
3814
+ "learning_rate": 2.803261977573904e-06,
3815
+ "loss": 0.0553,
3816
+ "step": 6210
3817
+ },
3818
+ {
3819
+ "epoch": 9.51,
3820
+ "learning_rate": 2.7183146449201493e-06,
3821
+ "loss": 0.0275,
3822
+ "step": 6220
3823
+ },
3824
+ {
3825
+ "epoch": 9.52,
3826
+ "learning_rate": 2.633367312266395e-06,
3827
+ "loss": 0.0227,
3828
+ "step": 6230
3829
+ },
3830
+ {
3831
+ "epoch": 9.54,
3832
+ "learning_rate": 2.54841997961264e-06,
3833
+ "loss": 0.0528,
3834
+ "step": 6240
3835
+ },
3836
+ {
3837
+ "epoch": 9.55,
3838
+ "learning_rate": 2.4634726469588856e-06,
3839
+ "loss": 0.0635,
3840
+ "step": 6250
3841
+ },
3842
+ {
3843
+ "epoch": 9.57,
3844
+ "learning_rate": 2.3785253143051312e-06,
3845
+ "loss": 0.0572,
3846
+ "step": 6260
3847
+ },
3848
+ {
3849
+ "epoch": 9.59,
3850
+ "learning_rate": 2.2935779816513764e-06,
3851
+ "loss": 0.042,
3852
+ "step": 6270
3853
+ },
3854
+ {
3855
+ "epoch": 9.6,
3856
+ "learning_rate": 2.2086306489976216e-06,
3857
+ "loss": 0.0422,
3858
+ "step": 6280
3859
+ },
3860
+ {
3861
+ "epoch": 9.62,
3862
+ "learning_rate": 2.123683316343867e-06,
3863
+ "loss": 0.0311,
3864
+ "step": 6290
3865
+ },
3866
+ {
3867
+ "epoch": 9.63,
3868
+ "learning_rate": 2.0387359836901123e-06,
3869
+ "loss": 0.0375,
3870
+ "step": 6300
3871
+ },
3872
+ {
3873
+ "epoch": 9.65,
3874
+ "learning_rate": 1.9537886510363575e-06,
3875
+ "loss": 0.0491,
3876
+ "step": 6310
3877
+ },
3878
+ {
3879
+ "epoch": 9.66,
3880
+ "learning_rate": 1.8688413183826028e-06,
3881
+ "loss": 0.0537,
3882
+ "step": 6320
3883
+ },
3884
+ {
3885
+ "epoch": 9.68,
3886
+ "learning_rate": 1.7838939857288482e-06,
3887
+ "loss": 0.0496,
3888
+ "step": 6330
3889
+ },
3890
+ {
3891
+ "epoch": 9.69,
3892
+ "learning_rate": 1.6989466530750936e-06,
3893
+ "loss": 0.0478,
3894
+ "step": 6340
3895
+ },
3896
+ {
3897
+ "epoch": 9.71,
3898
+ "learning_rate": 1.6139993204213387e-06,
3899
+ "loss": 0.0283,
3900
+ "step": 6350
3901
+ },
3902
+ {
3903
+ "epoch": 9.72,
3904
+ "learning_rate": 1.5290519877675841e-06,
3905
+ "loss": 0.0373,
3906
+ "step": 6360
3907
+ },
3908
+ {
3909
+ "epoch": 9.74,
3910
+ "learning_rate": 1.4441046551138295e-06,
3911
+ "loss": 0.0491,
3912
+ "step": 6370
3913
+ },
3914
+ {
3915
+ "epoch": 9.75,
3916
+ "learning_rate": 1.3591573224600747e-06,
3917
+ "loss": 0.0213,
3918
+ "step": 6380
3919
+ },
3920
+ {
3921
+ "epoch": 9.77,
3922
+ "learning_rate": 1.27420998980632e-06,
3923
+ "loss": 0.0414,
3924
+ "step": 6390
3925
+ },
3926
+ {
3927
+ "epoch": 9.78,
3928
+ "learning_rate": 1.1892626571525656e-06,
3929
+ "loss": 0.0573,
3930
+ "step": 6400
3931
+ },
3932
+ {
3933
+ "epoch": 9.8,
3934
+ "learning_rate": 1.1043153244988108e-06,
3935
+ "loss": 0.0467,
3936
+ "step": 6410
3937
+ },
3938
+ {
3939
+ "epoch": 9.81,
3940
+ "learning_rate": 1.0193679918450562e-06,
3941
+ "loss": 0.0545,
3942
+ "step": 6420
3943
+ },
3944
+ {
3945
+ "epoch": 9.83,
3946
+ "learning_rate": 9.344206591913014e-07,
3947
+ "loss": 0.042,
3948
+ "step": 6430
3949
+ },
3950
+ {
3951
+ "epoch": 9.85,
3952
+ "learning_rate": 8.494733265375468e-07,
3953
+ "loss": 0.0404,
3954
+ "step": 6440
3955
+ },
3956
+ {
3957
+ "epoch": 9.86,
3958
+ "learning_rate": 7.645259938837921e-07,
3959
+ "loss": 0.0524,
3960
+ "step": 6450
3961
+ },
3962
+ {
3963
+ "epoch": 9.88,
3964
+ "learning_rate": 6.795786612300373e-07,
3965
+ "loss": 0.0228,
3966
+ "step": 6460
3967
+ },
3968
+ {
3969
+ "epoch": 9.89,
3970
+ "learning_rate": 5.946313285762828e-07,
3971
+ "loss": 0.0444,
3972
+ "step": 6470
3973
+ },
3974
+ {
3975
+ "epoch": 9.91,
3976
+ "learning_rate": 5.096839959225281e-07,
3977
+ "loss": 0.0197,
3978
+ "step": 6480
3979
+ },
3980
+ {
3981
+ "epoch": 9.92,
3982
+ "learning_rate": 4.247366632687734e-07,
3983
+ "loss": 0.0297,
3984
+ "step": 6490
3985
+ },
3986
+ {
3987
+ "epoch": 9.94,
3988
+ "learning_rate": 3.3978933061501866e-07,
3989
+ "loss": 0.0422,
3990
+ "step": 6500
3991
+ },
3992
+ {
3993
+ "epoch": 9.95,
3994
+ "learning_rate": 2.5484199796126404e-07,
3995
+ "loss": 0.041,
3996
+ "step": 6510
3997
+ },
3998
+ {
3999
+ "epoch": 9.97,
4000
+ "learning_rate": 1.6989466530750933e-07,
4001
+ "loss": 0.0314,
4002
+ "step": 6520
4003
+ },
4004
+ {
4005
+ "epoch": 9.98,
4006
+ "learning_rate": 8.494733265375467e-08,
4007
+ "loss": 0.0388,
4008
+ "step": 6530
4009
+ },
4010
+ {
4011
+ "epoch": 10.0,
4012
+ "learning_rate": 0.0,
4013
+ "loss": 0.0562,
4014
+ "step": 6540
4015
+ },
4016
+ {
4017
+ "epoch": 10.0,
4018
+ "eval_accuracy": 0.7306590257879656,
4019
+ "eval_loss": 1.1663085222244263,
4020
+ "eval_runtime": 50.0529,
4021
+ "eval_samples_per_second": 104.589,
4022
+ "eval_steps_per_second": 13.086,
4023
+ "step": 6540
4024
+ },
4025
+ {
4026
+ "epoch": 10.0,
4027
+ "step": 6540,
4028
+ "total_flos": 1.6218250349213123e+19,
4029
+ "train_loss": 0.06548295821026195,
4030
+ "train_runtime": 2722.3253,
4031
+ "train_samples_per_second": 76.905,
4032
+ "train_steps_per_second": 2.402
4033
  }
4034
  ],
4035
  "logging_steps": 10,
4036
+ "max_steps": 6540,
4037
  "num_input_tokens_seen": 0,
4038
+ "num_train_epochs": 10,
4039
  "save_steps": 500,
4040
+ "total_flos": 1.6218250349213123e+19,
4041
  "train_batch_size": 8,
4042
  "trial_name": null,
4043
  "trial_params": null