BTX24 commited on
Commit
72d6ed0
1 Parent(s): a9896e5

End of training

Browse files
README.md CHANGED
@@ -4,11 +4,6 @@ license: apache-2.0
4
  base_model: facebook/convnextv2-base-22k-224
5
  tags:
6
  - generated_from_trainer
7
- metrics:
8
- - accuracy
9
- - f1
10
- - precision
11
- - recall
12
  model-index:
13
  - name: convnextv2-base-22k-224-finetuned-tekno24-highdata-90
14
  results: []
@@ -21,11 +16,16 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [facebook/convnextv2-base-22k-224](https://huggingface.co/facebook/convnextv2-base-22k-224) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.0280
25
- - Accuracy: 0.6129
26
- - F1: 0.6087
27
- - Precision: 0.6161
28
- - Recall: 0.6129
 
 
 
 
 
29
 
30
  ## Model description
31
 
@@ -56,42 +56,6 @@ The following hyperparameters were used during training:
56
  - num_epochs: 30
57
  - mixed_precision_training: Native AMP
58
 
59
- ### Training results
60
-
61
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
62
- |:-------------:|:-------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
63
- | 1.3277 | 0.9908 | 81 | 1.2870 | 0.4147 | 0.3280 | 0.3714 | 0.4147 |
64
- | 1.2024 | 1.9939 | 163 | 1.0890 | 0.4747 | 0.3907 | 0.4944 | 0.4747 |
65
- | 1.2067 | 2.9969 | 245 | 1.0601 | 0.5438 | 0.4965 | 0.5084 | 0.5438 |
66
- | 1.206 | 4.0 | 327 | 1.0143 | 0.5392 | 0.5159 | 0.5180 | 0.5392 |
67
- | 1.1049 | 4.9908 | 408 | 0.9688 | 0.5760 | 0.5451 | 0.5467 | 0.5760 |
68
- | 1.0931 | 5.9939 | 490 | 1.0351 | 0.5622 | 0.5562 | 0.5939 | 0.5622 |
69
- | 1.0752 | 6.9969 | 572 | 0.9370 | 0.5899 | 0.5592 | 0.5730 | 0.5899 |
70
- | 1.03 | 8.0 | 654 | 0.9417 | 0.5760 | 0.5510 | 0.5414 | 0.5760 |
71
- | 0.988 | 8.9908 | 735 | 0.8942 | 0.5991 | 0.5772 | 0.5819 | 0.5991 |
72
- | 0.9692 | 9.9939 | 817 | 0.9091 | 0.6083 | 0.5937 | 0.5981 | 0.6083 |
73
- | 0.9896 | 10.9969 | 899 | 0.8690 | 0.6037 | 0.5905 | 0.5937 | 0.6037 |
74
- | 0.9479 | 12.0 | 981 | 0.8705 | 0.6406 | 0.6268 | 0.6307 | 0.6406 |
75
- | 0.898 | 12.9908 | 1062 | 0.8569 | 0.6498 | 0.6440 | 0.6465 | 0.6498 |
76
- | 0.9101 | 13.9939 | 1144 | 0.8736 | 0.6129 | 0.6091 | 0.6179 | 0.6129 |
77
- | 0.8431 | 14.9969 | 1226 | 0.8684 | 0.6452 | 0.6419 | 0.6447 | 0.6452 |
78
- | 0.8187 | 16.0 | 1308 | 0.9032 | 0.6221 | 0.6199 | 0.6207 | 0.6221 |
79
- | 0.7614 | 16.9908 | 1389 | 0.9013 | 0.6359 | 0.6305 | 0.6434 | 0.6359 |
80
- | 0.725 | 17.9939 | 1471 | 0.9702 | 0.5991 | 0.5975 | 0.6072 | 0.5991 |
81
- | 0.6938 | 18.9969 | 1553 | 0.9598 | 0.6728 | 0.6660 | 0.6840 | 0.6728 |
82
- | 0.6761 | 20.0 | 1635 | 0.9886 | 0.6083 | 0.6112 | 0.6242 | 0.6083 |
83
- | 0.5865 | 20.9908 | 1716 | 0.9367 | 0.6498 | 0.6428 | 0.6432 | 0.6498 |
84
- | 0.5857 | 21.9939 | 1798 | 0.9694 | 0.6313 | 0.6322 | 0.6331 | 0.6313 |
85
- | 0.556 | 22.9969 | 1880 | 1.0212 | 0.6359 | 0.6296 | 0.6574 | 0.6359 |
86
- | 0.4871 | 24.0 | 1962 | 1.0328 | 0.5945 | 0.5879 | 0.5951 | 0.5945 |
87
- | 0.5254 | 24.9908 | 2043 | 1.0132 | 0.5945 | 0.5917 | 0.5968 | 0.5945 |
88
- | 0.5054 | 25.9939 | 2125 | 1.0385 | 0.5945 | 0.5944 | 0.5988 | 0.5945 |
89
- | 0.4706 | 26.9969 | 2207 | 1.0626 | 0.6037 | 0.5983 | 0.6100 | 0.6037 |
90
- | 0.418 | 28.0 | 2289 | 1.0531 | 0.5806 | 0.5774 | 0.5830 | 0.5806 |
91
- | 0.455 | 28.9908 | 2370 | 1.0340 | 0.6083 | 0.6039 | 0.6151 | 0.6083 |
92
- | 0.4414 | 29.7248 | 2430 | 1.0280 | 0.6129 | 0.6087 | 0.6161 | 0.6129 |
93
-
94
-
95
  ### Framework versions
96
 
97
  - Transformers 4.44.2
 
4
  base_model: facebook/convnextv2-base-22k-224
5
  tags:
6
  - generated_from_trainer
 
 
 
 
 
7
  model-index:
8
  - name: convnextv2-base-22k-224-finetuned-tekno24-highdata-90
9
  results: []
 
16
 
17
  This model is a fine-tuned version of [facebook/convnextv2-base-22k-224](https://huggingface.co/facebook/convnextv2-base-22k-224) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - eval_loss: 0.9598
20
+ - eval_model_preparation_time: 0.0061
21
+ - eval_accuracy: 0.6728
22
+ - eval_f1: 0.6660
23
+ - eval_precision: 0.6840
24
+ - eval_recall: 0.6728
25
+ - eval_runtime: 2.2537
26
+ - eval_samples_per_second: 96.288
27
+ - eval_steps_per_second: 6.212
28
+ - step: 0
29
 
30
  ## Model description
31
 
 
56
  - num_epochs: 30
57
  - mixed_precision_training: Native AMP
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ### Framework versions
60
 
61
  - Transformers 4.44.2
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.724770642201836,
3
+ "eval_accuracy": 0.6728110599078341,
4
+ "eval_f1": 0.6659537595021466,
5
+ "eval_loss": 0.9598256349563599,
6
+ "eval_model_preparation_time": 0.0061,
7
+ "eval_precision": 0.683992662820187,
8
+ "eval_recall": 0.6728110599078341,
9
+ "eval_runtime": 2.2537,
10
+ "eval_samples_per_second": 96.288,
11
+ "eval_steps_per_second": 6.212,
12
+ "total_flos": 1.2301888231484006e+19,
13
+ "train_loss": 0.8217329954414211,
14
+ "train_runtime": 6060.3586,
15
+ "train_samples_per_second": 25.865,
16
+ "train_steps_per_second": 0.401
17
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.6728110599078341,
3
+ "eval_f1": 0.6659537595021466,
4
+ "eval_loss": 0.9598256349563599,
5
+ "eval_model_preparation_time": 0.0061,
6
+ "eval_precision": 0.683992662820187,
7
+ "eval_recall": 0.6728110599078341,
8
+ "eval_runtime": 2.2537,
9
+ "eval_samples_per_second": 96.288,
10
+ "eval_steps_per_second": 6.212
11
+ }
runs/Sep05_12-48-05_b998cf4c28a2/events.out.tfevents.1725546823.b998cf4c28a2.3248.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21de9c6c64584ecb044683410bb304ecaa3be7c9d8668846e7a24d805e08e417
3
+ size 549
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.724770642201836,
3
+ "total_flos": 1.2301888231484006e+19,
4
+ "train_loss": 0.8217329954414211,
5
+ "train_runtime": 6060.3586,
6
+ "train_samples_per_second": 25.865,
7
+ "train_steps_per_second": 0.401
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6728110599078341,
3
+ "best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24-highdata-90/checkpoint-1553",
4
+ "epoch": 29.724770642201836,
5
+ "eval_steps": 500,
6
+ "global_step": 2430,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12232415902140673,
13
+ "grad_norm": 15.331552505493164,
14
+ "learning_rate": 2.05761316872428e-06,
15
+ "loss": 1.3954,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.24464831804281345,
20
+ "grad_norm": 13.410693168640137,
21
+ "learning_rate": 4.11522633744856e-06,
22
+ "loss": 1.3826,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.3669724770642202,
27
+ "grad_norm": 14.4244966506958,
28
+ "learning_rate": 5.967078189300412e-06,
29
+ "loss": 1.3692,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.4892966360856269,
34
+ "grad_norm": 13.09442138671875,
35
+ "learning_rate": 8.02469135802469e-06,
36
+ "loss": 1.3596,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.6116207951070336,
41
+ "grad_norm": 14.526571273803711,
42
+ "learning_rate": 1.008230452674897e-05,
43
+ "loss": 1.3519,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.7339449541284404,
48
+ "grad_norm": 21.34126091003418,
49
+ "learning_rate": 1.2139917695473252e-05,
50
+ "loss": 1.3338,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.8562691131498471,
55
+ "grad_norm": 17.52889060974121,
56
+ "learning_rate": 1.3991769547325103e-05,
57
+ "loss": 1.3236,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.9785932721712538,
62
+ "grad_norm": 17.072633743286133,
63
+ "learning_rate": 1.604938271604938e-05,
64
+ "loss": 1.3277,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.9908256880733946,
69
+ "eval_accuracy": 0.4147465437788018,
70
+ "eval_f1": 0.32804846883348265,
71
+ "eval_loss": 1.2870396375656128,
72
+ "eval_precision": 0.3713606931606389,
73
+ "eval_recall": 0.4147465437788018,
74
+ "eval_runtime": 2.1883,
75
+ "eval_samples_per_second": 99.163,
76
+ "eval_steps_per_second": 6.398,
77
+ "step": 81
78
+ },
79
+ {
80
+ "epoch": 1.1009174311926606,
81
+ "grad_norm": 22.833921432495117,
82
+ "learning_rate": 1.8106995884773663e-05,
83
+ "loss": 1.3091,
84
+ "step": 90
85
+ },
86
+ {
87
+ "epoch": 1.2232415902140672,
88
+ "grad_norm": 10.650703430175781,
89
+ "learning_rate": 2.016460905349794e-05,
90
+ "loss": 1.3123,
91
+ "step": 100
92
+ },
93
+ {
94
+ "epoch": 1.345565749235474,
95
+ "grad_norm": 9.893655776977539,
96
+ "learning_rate": 2.2222222222222223e-05,
97
+ "loss": 1.2944,
98
+ "step": 110
99
+ },
100
+ {
101
+ "epoch": 1.4678899082568808,
102
+ "grad_norm": 54.886756896972656,
103
+ "learning_rate": 2.3868312757201647e-05,
104
+ "loss": 1.2842,
105
+ "step": 120
106
+ },
107
+ {
108
+ "epoch": 1.5902140672782874,
109
+ "grad_norm": 16.818227767944336,
110
+ "learning_rate": 2.5925925925925925e-05,
111
+ "loss": 1.2572,
112
+ "step": 130
113
+ },
114
+ {
115
+ "epoch": 1.7125382262996942,
116
+ "grad_norm": 9.368868827819824,
117
+ "learning_rate": 2.7983539094650207e-05,
118
+ "loss": 1.2222,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 1.834862385321101,
123
+ "grad_norm": 12.728253364562988,
124
+ "learning_rate": 3.0041152263374488e-05,
125
+ "loss": 1.2346,
126
+ "step": 150
127
+ },
128
+ {
129
+ "epoch": 1.9571865443425076,
130
+ "grad_norm": 13.562764167785645,
131
+ "learning_rate": 3.209876543209876e-05,
132
+ "loss": 1.2024,
133
+ "step": 160
134
+ },
135
+ {
136
+ "epoch": 1.9938837920489296,
137
+ "eval_accuracy": 0.47465437788018433,
138
+ "eval_f1": 0.3906684622928812,
139
+ "eval_loss": 1.0890263319015503,
140
+ "eval_precision": 0.49441907661085743,
141
+ "eval_recall": 0.47465437788018433,
142
+ "eval_runtime": 2.1621,
143
+ "eval_samples_per_second": 100.363,
144
+ "eval_steps_per_second": 6.475,
145
+ "step": 163
146
+ },
147
+ {
148
+ "epoch": 2.079510703363914,
149
+ "grad_norm": 10.129838943481445,
150
+ "learning_rate": 3.4156378600823045e-05,
151
+ "loss": 1.2231,
152
+ "step": 170
153
+ },
154
+ {
155
+ "epoch": 2.2018348623853212,
156
+ "grad_norm": 10.82052230834961,
157
+ "learning_rate": 3.6213991769547327e-05,
158
+ "loss": 1.2082,
159
+ "step": 180
160
+ },
161
+ {
162
+ "epoch": 2.324159021406728,
163
+ "grad_norm": 15.055948257446289,
164
+ "learning_rate": 3.82716049382716e-05,
165
+ "loss": 1.1827,
166
+ "step": 190
167
+ },
168
+ {
169
+ "epoch": 2.4464831804281344,
170
+ "grad_norm": 14.08010196685791,
171
+ "learning_rate": 4.032921810699588e-05,
172
+ "loss": 1.1564,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 2.5688073394495414,
177
+ "grad_norm": 19.530576705932617,
178
+ "learning_rate": 4.2386831275720165e-05,
179
+ "loss": 1.1508,
180
+ "step": 210
181
+ },
182
+ {
183
+ "epoch": 2.691131498470948,
184
+ "grad_norm": 17.710391998291016,
185
+ "learning_rate": 4.4444444444444447e-05,
186
+ "loss": 1.2407,
187
+ "step": 220
188
+ },
189
+ {
190
+ "epoch": 2.8134556574923546,
191
+ "grad_norm": 19.007694244384766,
192
+ "learning_rate": 4.650205761316873e-05,
193
+ "loss": 1.1333,
194
+ "step": 230
195
+ },
196
+ {
197
+ "epoch": 2.9357798165137616,
198
+ "grad_norm": 11.404845237731934,
199
+ "learning_rate": 4.855967078189301e-05,
200
+ "loss": 1.2067,
201
+ "step": 240
202
+ },
203
+ {
204
+ "epoch": 2.996941896024465,
205
+ "eval_accuracy": 0.543778801843318,
206
+ "eval_f1": 0.49651048113106194,
207
+ "eval_loss": 1.060142993927002,
208
+ "eval_precision": 0.5083802142557903,
209
+ "eval_recall": 0.543778801843318,
210
+ "eval_runtime": 2.1941,
211
+ "eval_samples_per_second": 98.904,
212
+ "eval_steps_per_second": 6.381,
213
+ "step": 245
214
+ },
215
+ {
216
+ "epoch": 3.058103975535168,
217
+ "grad_norm": 7.823658466339111,
218
+ "learning_rate": 4.993141289437586e-05,
219
+ "loss": 1.1512,
220
+ "step": 250
221
+ },
222
+ {
223
+ "epoch": 3.180428134556575,
224
+ "grad_norm": 7.272976875305176,
225
+ "learning_rate": 4.970278920896205e-05,
226
+ "loss": 1.2064,
227
+ "step": 260
228
+ },
229
+ {
230
+ "epoch": 3.302752293577982,
231
+ "grad_norm": 8.453497886657715,
232
+ "learning_rate": 4.9474165523548245e-05,
233
+ "loss": 1.1758,
234
+ "step": 270
235
+ },
236
+ {
237
+ "epoch": 3.4250764525993884,
238
+ "grad_norm": 6.063063144683838,
239
+ "learning_rate": 4.924554183813443e-05,
240
+ "loss": 1.1481,
241
+ "step": 280
242
+ },
243
+ {
244
+ "epoch": 3.547400611620795,
245
+ "grad_norm": 11.22547435760498,
246
+ "learning_rate": 4.901691815272063e-05,
247
+ "loss": 1.1818,
248
+ "step": 290
249
+ },
250
+ {
251
+ "epoch": 3.669724770642202,
252
+ "grad_norm": 8.580599784851074,
253
+ "learning_rate": 4.8788294467306815e-05,
254
+ "loss": 1.1345,
255
+ "step": 300
256
+ },
257
+ {
258
+ "epoch": 3.7920489296636086,
259
+ "grad_norm": 16.203880310058594,
260
+ "learning_rate": 4.855967078189301e-05,
261
+ "loss": 1.1451,
262
+ "step": 310
263
+ },
264
+ {
265
+ "epoch": 3.914373088685015,
266
+ "grad_norm": 7.94223165512085,
267
+ "learning_rate": 4.83310470964792e-05,
268
+ "loss": 1.206,
269
+ "step": 320
270
+ },
271
+ {
272
+ "epoch": 4.0,
273
+ "eval_accuracy": 0.5391705069124424,
274
+ "eval_f1": 0.515930279628824,
275
+ "eval_loss": 1.0142838954925537,
276
+ "eval_precision": 0.5180022054326838,
277
+ "eval_recall": 0.5391705069124424,
278
+ "eval_runtime": 2.1844,
279
+ "eval_samples_per_second": 99.339,
280
+ "eval_steps_per_second": 6.409,
281
+ "step": 327
282
+ },
283
+ {
284
+ "epoch": 4.036697247706422,
285
+ "grad_norm": 7.936139106750488,
286
+ "learning_rate": 4.810242341106539e-05,
287
+ "loss": 1.1749,
288
+ "step": 330
289
+ },
290
+ {
291
+ "epoch": 4.159021406727828,
292
+ "grad_norm": 6.124804496765137,
293
+ "learning_rate": 4.787379972565158e-05,
294
+ "loss": 1.1473,
295
+ "step": 340
296
+ },
297
+ {
298
+ "epoch": 4.281345565749236,
299
+ "grad_norm": 9.12104320526123,
300
+ "learning_rate": 4.764517604023777e-05,
301
+ "loss": 1.1809,
302
+ "step": 350
303
+ },
304
+ {
305
+ "epoch": 4.4036697247706424,
306
+ "grad_norm": 6.582239627838135,
307
+ "learning_rate": 4.741655235482396e-05,
308
+ "loss": 1.1728,
309
+ "step": 360
310
+ },
311
+ {
312
+ "epoch": 4.525993883792049,
313
+ "grad_norm": 7.613648891448975,
314
+ "learning_rate": 4.718792866941015e-05,
315
+ "loss": 1.1315,
316
+ "step": 370
317
+ },
318
+ {
319
+ "epoch": 4.648318042813456,
320
+ "grad_norm": 11.376667976379395,
321
+ "learning_rate": 4.6959304983996346e-05,
322
+ "loss": 1.1481,
323
+ "step": 380
324
+ },
325
+ {
326
+ "epoch": 4.770642201834862,
327
+ "grad_norm": 6.794886112213135,
328
+ "learning_rate": 4.6730681298582534e-05,
329
+ "loss": 1.0927,
330
+ "step": 390
331
+ },
332
+ {
333
+ "epoch": 4.892966360856269,
334
+ "grad_norm": 7.452205657958984,
335
+ "learning_rate": 4.650205761316873e-05,
336
+ "loss": 1.1049,
337
+ "step": 400
338
+ },
339
+ {
340
+ "epoch": 4.990825688073395,
341
+ "eval_accuracy": 0.576036866359447,
342
+ "eval_f1": 0.5450544928742728,
343
+ "eval_loss": 0.968787431716919,
344
+ "eval_precision": 0.5467442569565754,
345
+ "eval_recall": 0.576036866359447,
346
+ "eval_runtime": 2.1812,
347
+ "eval_samples_per_second": 99.488,
348
+ "eval_steps_per_second": 6.419,
349
+ "step": 408
350
+ },
351
+ {
352
+ "epoch": 5.015290519877676,
353
+ "grad_norm": 6.982997417449951,
354
+ "learning_rate": 4.6273433927754916e-05,
355
+ "loss": 1.1165,
356
+ "step": 410
357
+ },
358
+ {
359
+ "epoch": 5.137614678899083,
360
+ "grad_norm": 7.05279541015625,
361
+ "learning_rate": 4.604481024234111e-05,
362
+ "loss": 1.0755,
363
+ "step": 420
364
+ },
365
+ {
366
+ "epoch": 5.259938837920489,
367
+ "grad_norm": 9.04000186920166,
368
+ "learning_rate": 4.58161865569273e-05,
369
+ "loss": 1.1144,
370
+ "step": 430
371
+ },
372
+ {
373
+ "epoch": 5.382262996941896,
374
+ "grad_norm": 5.530284881591797,
375
+ "learning_rate": 4.558756287151349e-05,
376
+ "loss": 1.088,
377
+ "step": 440
378
+ },
379
+ {
380
+ "epoch": 5.504587155963303,
381
+ "grad_norm": 11.77010440826416,
382
+ "learning_rate": 4.535893918609968e-05,
383
+ "loss": 1.0805,
384
+ "step": 450
385
+ },
386
+ {
387
+ "epoch": 5.626911314984709,
388
+ "grad_norm": 9.45374870300293,
389
+ "learning_rate": 4.513031550068587e-05,
390
+ "loss": 1.1098,
391
+ "step": 460
392
+ },
393
+ {
394
+ "epoch": 5.749235474006117,
395
+ "grad_norm": 9.706932067871094,
396
+ "learning_rate": 4.4901691815272064e-05,
397
+ "loss": 1.0726,
398
+ "step": 470
399
+ },
400
+ {
401
+ "epoch": 5.871559633027523,
402
+ "grad_norm": 9.147931098937988,
403
+ "learning_rate": 4.467306812985825e-05,
404
+ "loss": 1.0934,
405
+ "step": 480
406
+ },
407
+ {
408
+ "epoch": 5.99388379204893,
409
+ "grad_norm": 6.612918376922607,
410
+ "learning_rate": 4.4444444444444447e-05,
411
+ "loss": 1.0931,
412
+ "step": 490
413
+ },
414
+ {
415
+ "epoch": 5.99388379204893,
416
+ "eval_accuracy": 0.5622119815668203,
417
+ "eval_f1": 0.5562044726083856,
418
+ "eval_loss": 1.0351340770721436,
419
+ "eval_precision": 0.5939199765927876,
420
+ "eval_recall": 0.5622119815668203,
421
+ "eval_runtime": 2.1678,
422
+ "eval_samples_per_second": 100.104,
423
+ "eval_steps_per_second": 6.458,
424
+ "step": 490
425
+ },
426
+ {
427
+ "epoch": 6.116207951070336,
428
+ "grad_norm": 10.358129501342773,
429
+ "learning_rate": 4.4215820759030634e-05,
430
+ "loss": 1.0469,
431
+ "step": 500
432
+ },
433
+ {
434
+ "epoch": 6.238532110091743,
435
+ "grad_norm": 5.924838542938232,
436
+ "learning_rate": 4.398719707361683e-05,
437
+ "loss": 1.0814,
438
+ "step": 510
439
+ },
440
+ {
441
+ "epoch": 6.36085626911315,
442
+ "grad_norm": 7.012035369873047,
443
+ "learning_rate": 4.3758573388203024e-05,
444
+ "loss": 1.1091,
445
+ "step": 520
446
+ },
447
+ {
448
+ "epoch": 6.483180428134556,
449
+ "grad_norm": 6.860468864440918,
450
+ "learning_rate": 4.352994970278921e-05,
451
+ "loss": 1.091,
452
+ "step": 530
453
+ },
454
+ {
455
+ "epoch": 6.605504587155964,
456
+ "grad_norm": 6.236204624176025,
457
+ "learning_rate": 4.3301326017375406e-05,
458
+ "loss": 1.1033,
459
+ "step": 540
460
+ },
461
+ {
462
+ "epoch": 6.72782874617737,
463
+ "grad_norm": 11.546574592590332,
464
+ "learning_rate": 4.3072702331961594e-05,
465
+ "loss": 1.067,
466
+ "step": 550
467
+ },
468
+ {
469
+ "epoch": 6.850152905198777,
470
+ "grad_norm": 10.157209396362305,
471
+ "learning_rate": 4.284407864654779e-05,
472
+ "loss": 1.054,
473
+ "step": 560
474
+ },
475
+ {
476
+ "epoch": 6.972477064220183,
477
+ "grad_norm": 7.421886444091797,
478
+ "learning_rate": 4.261545496113398e-05,
479
+ "loss": 1.0752,
480
+ "step": 570
481
+ },
482
+ {
483
+ "epoch": 6.996941896024465,
484
+ "eval_accuracy": 0.5898617511520737,
485
+ "eval_f1": 0.5592428280600323,
486
+ "eval_loss": 0.9370155930519104,
487
+ "eval_precision": 0.5730448203692986,
488
+ "eval_recall": 0.5898617511520737,
489
+ "eval_runtime": 2.1864,
490
+ "eval_samples_per_second": 99.25,
491
+ "eval_steps_per_second": 6.403,
492
+ "step": 572
493
+ },
494
+ {
495
+ "epoch": 7.09480122324159,
496
+ "grad_norm": 8.062494277954102,
497
+ "learning_rate": 4.2386831275720165e-05,
498
+ "loss": 1.0298,
499
+ "step": 580
500
+ },
501
+ {
502
+ "epoch": 7.217125382262997,
503
+ "grad_norm": 7.170814514160156,
504
+ "learning_rate": 4.215820759030636e-05,
505
+ "loss": 1.0451,
506
+ "step": 590
507
+ },
508
+ {
509
+ "epoch": 7.339449541284404,
510
+ "grad_norm": 7.2061662673950195,
511
+ "learning_rate": 4.192958390489255e-05,
512
+ "loss": 1.0586,
513
+ "step": 600
514
+ },
515
+ {
516
+ "epoch": 7.461773700305811,
517
+ "grad_norm": 5.97224760055542,
518
+ "learning_rate": 4.170096021947874e-05,
519
+ "loss": 1.0543,
520
+ "step": 610
521
+ },
522
+ {
523
+ "epoch": 7.584097859327217,
524
+ "grad_norm": 7.366073131561279,
525
+ "learning_rate": 4.147233653406493e-05,
526
+ "loss": 1.0903,
527
+ "step": 620
528
+ },
529
+ {
530
+ "epoch": 7.706422018348624,
531
+ "grad_norm": 6.186879634857178,
532
+ "learning_rate": 4.1243712848651125e-05,
533
+ "loss": 1.0919,
534
+ "step": 630
535
+ },
536
+ {
537
+ "epoch": 7.82874617737003,
538
+ "grad_norm": 8.208704948425293,
539
+ "learning_rate": 4.101508916323731e-05,
540
+ "loss": 1.1254,
541
+ "step": 640
542
+ },
543
+ {
544
+ "epoch": 7.951070336391437,
545
+ "grad_norm": 13.736848831176758,
546
+ "learning_rate": 4.078646547782351e-05,
547
+ "loss": 1.03,
548
+ "step": 650
549
+ },
550
+ {
551
+ "epoch": 8.0,
552
+ "eval_accuracy": 0.576036866359447,
553
+ "eval_f1": 0.5509975224553819,
554
+ "eval_loss": 0.9416872262954712,
555
+ "eval_precision": 0.5414011052041612,
556
+ "eval_recall": 0.576036866359447,
557
+ "eval_runtime": 2.1964,
558
+ "eval_samples_per_second": 98.796,
559
+ "eval_steps_per_second": 6.374,
560
+ "step": 654
561
+ },
562
+ {
563
+ "epoch": 8.073394495412844,
564
+ "grad_norm": 7.8366546630859375,
565
+ "learning_rate": 4.0557841792409695e-05,
566
+ "loss": 1.0581,
567
+ "step": 660
568
+ },
569
+ {
570
+ "epoch": 8.19571865443425,
571
+ "grad_norm": 6.876219272613525,
572
+ "learning_rate": 4.032921810699588e-05,
573
+ "loss": 1.04,
574
+ "step": 670
575
+ },
576
+ {
577
+ "epoch": 8.318042813455657,
578
+ "grad_norm": 7.931809902191162,
579
+ "learning_rate": 4.010059442158208e-05,
580
+ "loss": 1.0319,
581
+ "step": 680
582
+ },
583
+ {
584
+ "epoch": 8.440366972477065,
585
+ "grad_norm": 11.545510292053223,
586
+ "learning_rate": 3.9871970736168266e-05,
587
+ "loss": 1.0027,
588
+ "step": 690
589
+ },
590
+ {
591
+ "epoch": 8.562691131498472,
592
+ "grad_norm": 7.059225559234619,
593
+ "learning_rate": 3.964334705075446e-05,
594
+ "loss": 1.0164,
595
+ "step": 700
596
+ },
597
+ {
598
+ "epoch": 8.685015290519878,
599
+ "grad_norm": 6.870743751525879,
600
+ "learning_rate": 3.941472336534065e-05,
601
+ "loss": 1.0063,
602
+ "step": 710
603
+ },
604
+ {
605
+ "epoch": 8.807339449541285,
606
+ "grad_norm": 7.4264631271362305,
607
+ "learning_rate": 3.918609967992684e-05,
608
+ "loss": 1.0359,
609
+ "step": 720
610
+ },
611
+ {
612
+ "epoch": 8.929663608562691,
613
+ "grad_norm": 8.050666809082031,
614
+ "learning_rate": 3.895747599451303e-05,
615
+ "loss": 0.988,
616
+ "step": 730
617
+ },
618
+ {
619
+ "epoch": 8.990825688073395,
620
+ "eval_accuracy": 0.5990783410138248,
621
+ "eval_f1": 0.5772079574092954,
622
+ "eval_loss": 0.8942155838012695,
623
+ "eval_precision": 0.5818760258445811,
624
+ "eval_recall": 0.5990783410138248,
625
+ "eval_runtime": 2.178,
626
+ "eval_samples_per_second": 99.631,
627
+ "eval_steps_per_second": 6.428,
628
+ "step": 735
629
+ },
630
+ {
631
+ "epoch": 9.051987767584098,
632
+ "grad_norm": 6.86763858795166,
633
+ "learning_rate": 3.8728852309099226e-05,
634
+ "loss": 0.9927,
635
+ "step": 740
636
+ },
637
+ {
638
+ "epoch": 9.174311926605505,
639
+ "grad_norm": 7.786592483520508,
640
+ "learning_rate": 3.8500228623685414e-05,
641
+ "loss": 0.976,
642
+ "step": 750
643
+ },
644
+ {
645
+ "epoch": 9.296636085626911,
646
+ "grad_norm": 9.186685562133789,
647
+ "learning_rate": 3.82716049382716e-05,
648
+ "loss": 1.0239,
649
+ "step": 760
650
+ },
651
+ {
652
+ "epoch": 9.418960244648318,
653
+ "grad_norm": 8.555779457092285,
654
+ "learning_rate": 3.8042981252857796e-05,
655
+ "loss": 0.9526,
656
+ "step": 770
657
+ },
658
+ {
659
+ "epoch": 9.541284403669724,
660
+ "grad_norm": 9.215676307678223,
661
+ "learning_rate": 3.7814357567443984e-05,
662
+ "loss": 1.0258,
663
+ "step": 780
664
+ },
665
+ {
666
+ "epoch": 9.663608562691131,
667
+ "grad_norm": 7.625224590301514,
668
+ "learning_rate": 3.758573388203018e-05,
669
+ "loss": 1.0042,
670
+ "step": 790
671
+ },
672
+ {
673
+ "epoch": 9.785932721712538,
674
+ "grad_norm": 9.265899658203125,
675
+ "learning_rate": 3.7357110196616373e-05,
676
+ "loss": 1.008,
677
+ "step": 800
678
+ },
679
+ {
680
+ "epoch": 9.908256880733944,
681
+ "grad_norm": 7.059952735900879,
682
+ "learning_rate": 3.712848651120257e-05,
683
+ "loss": 0.9692,
684
+ "step": 810
685
+ },
686
+ {
687
+ "epoch": 9.99388379204893,
688
+ "eval_accuracy": 0.6082949308755761,
689
+ "eval_f1": 0.5936874933582452,
690
+ "eval_loss": 0.9091479182243347,
691
+ "eval_precision": 0.5980988666361812,
692
+ "eval_recall": 0.6082949308755761,
693
+ "eval_runtime": 2.1969,
694
+ "eval_samples_per_second": 98.777,
695
+ "eval_steps_per_second": 6.373,
696
+ "step": 817
697
+ },
698
+ {
699
+ "epoch": 10.030581039755353,
700
+ "grad_norm": 8.655425071716309,
701
+ "learning_rate": 3.6899862825788756e-05,
702
+ "loss": 1.0144,
703
+ "step": 820
704
+ },
705
+ {
706
+ "epoch": 10.15290519877676,
707
+ "grad_norm": 7.1922607421875,
708
+ "learning_rate": 3.6671239140374944e-05,
709
+ "loss": 0.9994,
710
+ "step": 830
711
+ },
712
+ {
713
+ "epoch": 10.275229357798166,
714
+ "grad_norm": 7.687134265899658,
715
+ "learning_rate": 3.644261545496114e-05,
716
+ "loss": 0.9879,
717
+ "step": 840
718
+ },
719
+ {
720
+ "epoch": 10.397553516819572,
721
+ "grad_norm": 9.036182403564453,
722
+ "learning_rate": 3.6213991769547327e-05,
723
+ "loss": 0.9218,
724
+ "step": 850
725
+ },
726
+ {
727
+ "epoch": 10.519877675840979,
728
+ "grad_norm": 11.018427848815918,
729
+ "learning_rate": 3.598536808413352e-05,
730
+ "loss": 1.0079,
731
+ "step": 860
732
+ },
733
+ {
734
+ "epoch": 10.642201834862385,
735
+ "grad_norm": 9.172164916992188,
736
+ "learning_rate": 3.575674439871971e-05,
737
+ "loss": 0.9795,
738
+ "step": 870
739
+ },
740
+ {
741
+ "epoch": 10.764525993883792,
742
+ "grad_norm": 10.210972785949707,
743
+ "learning_rate": 3.5528120713305904e-05,
744
+ "loss": 0.9675,
745
+ "step": 880
746
+ },
747
+ {
748
+ "epoch": 10.886850152905199,
749
+ "grad_norm": 9.860912322998047,
750
+ "learning_rate": 3.529949702789209e-05,
751
+ "loss": 0.9896,
752
+ "step": 890
753
+ },
754
+ {
755
+ "epoch": 10.996941896024465,
756
+ "eval_accuracy": 0.6036866359447005,
757
+ "eval_f1": 0.5904668698923761,
758
+ "eval_loss": 0.8689674139022827,
759
+ "eval_precision": 0.5936795435008436,
760
+ "eval_recall": 0.6036866359447005,
761
+ "eval_runtime": 2.1973,
762
+ "eval_samples_per_second": 98.756,
763
+ "eval_steps_per_second": 6.371,
764
+ "step": 899
765
+ },
766
+ {
767
+ "epoch": 11.009174311926605,
768
+ "grad_norm": 9.831244468688965,
769
+ "learning_rate": 3.5070873342478286e-05,
770
+ "loss": 0.9261,
771
+ "step": 900
772
+ },
773
+ {
774
+ "epoch": 11.131498470948012,
775
+ "grad_norm": 11.262879371643066,
776
+ "learning_rate": 3.4842249657064474e-05,
777
+ "loss": 0.9191,
778
+ "step": 910
779
+ },
780
+ {
781
+ "epoch": 11.253822629969418,
782
+ "grad_norm": 9.582681655883789,
783
+ "learning_rate": 3.461362597165066e-05,
784
+ "loss": 0.9026,
785
+ "step": 920
786
+ },
787
+ {
788
+ "epoch": 11.376146788990825,
789
+ "grad_norm": 9.12176513671875,
790
+ "learning_rate": 3.438500228623686e-05,
791
+ "loss": 0.9882,
792
+ "step": 930
793
+ },
794
+ {
795
+ "epoch": 11.498470948012232,
796
+ "grad_norm": 10.87016773223877,
797
+ "learning_rate": 3.4156378600823045e-05,
798
+ "loss": 0.9379,
799
+ "step": 940
800
+ },
801
+ {
802
+ "epoch": 11.62079510703364,
803
+ "grad_norm": 8.557551383972168,
804
+ "learning_rate": 3.392775491540924e-05,
805
+ "loss": 0.9312,
806
+ "step": 950
807
+ },
808
+ {
809
+ "epoch": 11.743119266055047,
810
+ "grad_norm": 8.361717224121094,
811
+ "learning_rate": 3.369913122999543e-05,
812
+ "loss": 0.9085,
813
+ "step": 960
814
+ },
815
+ {
816
+ "epoch": 11.865443425076453,
817
+ "grad_norm": 7.925179958343506,
818
+ "learning_rate": 3.347050754458162e-05,
819
+ "loss": 1.0008,
820
+ "step": 970
821
+ },
822
+ {
823
+ "epoch": 11.98776758409786,
824
+ "grad_norm": 9.740839004516602,
825
+ "learning_rate": 3.324188385916781e-05,
826
+ "loss": 0.9479,
827
+ "step": 980
828
+ },
829
+ {
830
+ "epoch": 12.0,
831
+ "eval_accuracy": 0.6405529953917051,
832
+ "eval_f1": 0.6268359763707001,
833
+ "eval_loss": 0.8705018162727356,
834
+ "eval_precision": 0.6307341542199126,
835
+ "eval_recall": 0.6405529953917051,
836
+ "eval_runtime": 2.1984,
837
+ "eval_samples_per_second": 98.71,
838
+ "eval_steps_per_second": 6.368,
839
+ "step": 981
840
+ },
841
+ {
842
+ "epoch": 12.110091743119266,
843
+ "grad_norm": 9.437625885009766,
844
+ "learning_rate": 3.3013260173754005e-05,
845
+ "loss": 0.8172,
846
+ "step": 990
847
+ },
848
+ {
849
+ "epoch": 12.232415902140673,
850
+ "grad_norm": 8.812919616699219,
851
+ "learning_rate": 3.278463648834019e-05,
852
+ "loss": 0.8996,
853
+ "step": 1000
854
+ },
855
+ {
856
+ "epoch": 12.35474006116208,
857
+ "grad_norm": 8.750353813171387,
858
+ "learning_rate": 3.255601280292638e-05,
859
+ "loss": 0.9655,
860
+ "step": 1010
861
+ },
862
+ {
863
+ "epoch": 12.477064220183486,
864
+ "grad_norm": 7.671780586242676,
865
+ "learning_rate": 3.2327389117512575e-05,
866
+ "loss": 0.9139,
867
+ "step": 1020
868
+ },
869
+ {
870
+ "epoch": 12.599388379204893,
871
+ "grad_norm": 9.657299041748047,
872
+ "learning_rate": 3.209876543209876e-05,
873
+ "loss": 0.9269,
874
+ "step": 1030
875
+ },
876
+ {
877
+ "epoch": 12.7217125382263,
878
+ "grad_norm": 9.090998649597168,
879
+ "learning_rate": 3.187014174668496e-05,
880
+ "loss": 0.8867,
881
+ "step": 1040
882
+ },
883
+ {
884
+ "epoch": 12.844036697247706,
885
+ "grad_norm": 9.977676391601562,
886
+ "learning_rate": 3.1641518061271146e-05,
887
+ "loss": 0.89,
888
+ "step": 1050
889
+ },
890
+ {
891
+ "epoch": 12.966360856269112,
892
+ "grad_norm": 10.456171035766602,
893
+ "learning_rate": 3.141289437585734e-05,
894
+ "loss": 0.898,
895
+ "step": 1060
896
+ },
897
+ {
898
+ "epoch": 12.990825688073395,
899
+ "eval_accuracy": 0.6497695852534562,
900
+ "eval_f1": 0.6439767898106951,
901
+ "eval_loss": 0.8568853735923767,
902
+ "eval_precision": 0.6465264959675692,
903
+ "eval_recall": 0.6497695852534562,
904
+ "eval_runtime": 2.1871,
905
+ "eval_samples_per_second": 99.216,
906
+ "eval_steps_per_second": 6.401,
907
+ "step": 1062
908
+ },
909
+ {
910
+ "epoch": 13.08868501529052,
911
+ "grad_norm": 9.029878616333008,
912
+ "learning_rate": 3.118427069044353e-05,
913
+ "loss": 0.9008,
914
+ "step": 1070
915
+ },
916
+ {
917
+ "epoch": 13.211009174311927,
918
+ "grad_norm": 11.082198143005371,
919
+ "learning_rate": 3.095564700502972e-05,
920
+ "loss": 0.8678,
921
+ "step": 1080
922
+ },
923
+ {
924
+ "epoch": 13.333333333333334,
925
+ "grad_norm": 8.106987953186035,
926
+ "learning_rate": 3.072702331961592e-05,
927
+ "loss": 0.8724,
928
+ "step": 1090
929
+ },
930
+ {
931
+ "epoch": 13.45565749235474,
932
+ "grad_norm": 9.417874336242676,
933
+ "learning_rate": 3.0498399634202106e-05,
934
+ "loss": 0.8435,
935
+ "step": 1100
936
+ },
937
+ {
938
+ "epoch": 13.577981651376147,
939
+ "grad_norm": 10.039156913757324,
940
+ "learning_rate": 3.0269775948788297e-05,
941
+ "loss": 0.7984,
942
+ "step": 1110
943
+ },
944
+ {
945
+ "epoch": 13.700305810397554,
946
+ "grad_norm": 10.906753540039062,
947
+ "learning_rate": 3.0041152263374488e-05,
948
+ "loss": 0.8578,
949
+ "step": 1120
950
+ },
951
+ {
952
+ "epoch": 13.82262996941896,
953
+ "grad_norm": 9.698974609375,
954
+ "learning_rate": 2.981252857796068e-05,
955
+ "loss": 0.8699,
956
+ "step": 1130
957
+ },
958
+ {
959
+ "epoch": 13.944954128440367,
960
+ "grad_norm": 9.868678092956543,
961
+ "learning_rate": 2.958390489254687e-05,
962
+ "loss": 0.9101,
963
+ "step": 1140
964
+ },
965
+ {
966
+ "epoch": 13.99388379204893,
967
+ "eval_accuracy": 0.6129032258064516,
968
+ "eval_f1": 0.6090804206647874,
969
+ "eval_loss": 0.8736193180084229,
970
+ "eval_precision": 0.6179152160718983,
971
+ "eval_recall": 0.6129032258064516,
972
+ "eval_runtime": 2.1919,
973
+ "eval_samples_per_second": 99.002,
974
+ "eval_steps_per_second": 6.387,
975
+ "step": 1144
976
+ },
977
+ {
978
+ "epoch": 14.067278287461773,
979
+ "grad_norm": 8.79443645477295,
980
+ "learning_rate": 2.9355281207133062e-05,
981
+ "loss": 0.8439,
982
+ "step": 1150
983
+ },
984
+ {
985
+ "epoch": 14.18960244648318,
986
+ "grad_norm": 9.417186737060547,
987
+ "learning_rate": 2.9126657521719253e-05,
988
+ "loss": 0.8763,
989
+ "step": 1160
990
+ },
991
+ {
992
+ "epoch": 14.311926605504587,
993
+ "grad_norm": 11.611254692077637,
994
+ "learning_rate": 2.8898033836305445e-05,
995
+ "loss": 0.8305,
996
+ "step": 1170
997
+ },
998
+ {
999
+ "epoch": 14.434250764525993,
1000
+ "grad_norm": 10.025824546813965,
1001
+ "learning_rate": 2.8669410150891636e-05,
1002
+ "loss": 0.7743,
1003
+ "step": 1180
1004
+ },
1005
+ {
1006
+ "epoch": 14.5565749235474,
1007
+ "grad_norm": 10.79056453704834,
1008
+ "learning_rate": 2.8440786465477824e-05,
1009
+ "loss": 0.8414,
1010
+ "step": 1190
1011
+ },
1012
+ {
1013
+ "epoch": 14.678899082568808,
1014
+ "grad_norm": 8.00452709197998,
1015
+ "learning_rate": 2.8212162780064015e-05,
1016
+ "loss": 0.8934,
1017
+ "step": 1200
1018
+ },
1019
+ {
1020
+ "epoch": 14.801223241590215,
1021
+ "grad_norm": 11.359430313110352,
1022
+ "learning_rate": 2.7983539094650207e-05,
1023
+ "loss": 0.8844,
1024
+ "step": 1210
1025
+ },
1026
+ {
1027
+ "epoch": 14.923547400611621,
1028
+ "grad_norm": 10.003506660461426,
1029
+ "learning_rate": 2.7754915409236398e-05,
1030
+ "loss": 0.8431,
1031
+ "step": 1220
1032
+ },
1033
+ {
1034
+ "epoch": 14.996941896024465,
1035
+ "eval_accuracy": 0.6451612903225806,
1036
+ "eval_f1": 0.6418579664744979,
1037
+ "eval_loss": 0.8684141039848328,
1038
+ "eval_precision": 0.6447143927349024,
1039
+ "eval_recall": 0.6451612903225806,
1040
+ "eval_runtime": 2.1833,
1041
+ "eval_samples_per_second": 99.392,
1042
+ "eval_steps_per_second": 6.412,
1043
+ "step": 1226
1044
+ },
1045
+ {
1046
+ "epoch": 15.045871559633028,
1047
+ "grad_norm": 8.596139907836914,
1048
+ "learning_rate": 2.752629172382259e-05,
1049
+ "loss": 0.8224,
1050
+ "step": 1230
1051
+ },
1052
+ {
1053
+ "epoch": 15.168195718654435,
1054
+ "grad_norm": 11.239164352416992,
1055
+ "learning_rate": 2.729766803840878e-05,
1056
+ "loss": 0.8407,
1057
+ "step": 1240
1058
+ },
1059
+ {
1060
+ "epoch": 15.290519877675841,
1061
+ "grad_norm": 13.581149101257324,
1062
+ "learning_rate": 2.7069044352994972e-05,
1063
+ "loss": 0.7794,
1064
+ "step": 1250
1065
+ },
1066
+ {
1067
+ "epoch": 15.412844036697248,
1068
+ "grad_norm": 10.948393821716309,
1069
+ "learning_rate": 2.6840420667581163e-05,
1070
+ "loss": 0.8199,
1071
+ "step": 1260
1072
+ },
1073
+ {
1074
+ "epoch": 15.535168195718654,
1075
+ "grad_norm": 9.145750999450684,
1076
+ "learning_rate": 2.6611796982167354e-05,
1077
+ "loss": 0.7257,
1078
+ "step": 1270
1079
+ },
1080
+ {
1081
+ "epoch": 15.65749235474006,
1082
+ "grad_norm": 11.599444389343262,
1083
+ "learning_rate": 2.6383173296753542e-05,
1084
+ "loss": 0.7847,
1085
+ "step": 1280
1086
+ },
1087
+ {
1088
+ "epoch": 15.779816513761467,
1089
+ "grad_norm": 11.259026527404785,
1090
+ "learning_rate": 2.6154549611339734e-05,
1091
+ "loss": 0.8404,
1092
+ "step": 1290
1093
+ },
1094
+ {
1095
+ "epoch": 15.902140672782874,
1096
+ "grad_norm": 11.746826171875,
1097
+ "learning_rate": 2.5925925925925925e-05,
1098
+ "loss": 0.8187,
1099
+ "step": 1300
1100
+ },
1101
+ {
1102
+ "epoch": 16.0,
1103
+ "eval_accuracy": 0.6221198156682027,
1104
+ "eval_f1": 0.6198809621231663,
1105
+ "eval_loss": 0.9032032489776611,
1106
+ "eval_precision": 0.6206784143654135,
1107
+ "eval_recall": 0.6221198156682027,
1108
+ "eval_runtime": 2.1909,
1109
+ "eval_samples_per_second": 99.047,
1110
+ "eval_steps_per_second": 6.39,
1111
+ "step": 1308
1112
+ },
1113
+ {
1114
+ "epoch": 16.02446483180428,
1115
+ "grad_norm": 9.55320930480957,
1116
+ "learning_rate": 2.5697302240512116e-05,
1117
+ "loss": 0.7986,
1118
+ "step": 1310
1119
+ },
1120
+ {
1121
+ "epoch": 16.146788990825687,
1122
+ "grad_norm": 10.655790328979492,
1123
+ "learning_rate": 2.5468678555098308e-05,
1124
+ "loss": 0.7589,
1125
+ "step": 1320
1126
+ },
1127
+ {
1128
+ "epoch": 16.269113149847094,
1129
+ "grad_norm": 10.707428932189941,
1130
+ "learning_rate": 2.52400548696845e-05,
1131
+ "loss": 0.7315,
1132
+ "step": 1330
1133
+ },
1134
+ {
1135
+ "epoch": 16.3914373088685,
1136
+ "grad_norm": 12.252084732055664,
1137
+ "learning_rate": 2.501143118427069e-05,
1138
+ "loss": 0.7892,
1139
+ "step": 1340
1140
+ },
1141
+ {
1142
+ "epoch": 16.513761467889907,
1143
+ "grad_norm": 9.307701110839844,
1144
+ "learning_rate": 2.4782807498856885e-05,
1145
+ "loss": 0.7698,
1146
+ "step": 1350
1147
+ },
1148
+ {
1149
+ "epoch": 16.636085626911314,
1150
+ "grad_norm": 10.571149826049805,
1151
+ "learning_rate": 2.4554183813443073e-05,
1152
+ "loss": 0.7547,
1153
+ "step": 1360
1154
+ },
1155
+ {
1156
+ "epoch": 16.75840978593272,
1157
+ "grad_norm": 11.456555366516113,
1158
+ "learning_rate": 2.4325560128029264e-05,
1159
+ "loss": 0.7723,
1160
+ "step": 1370
1161
+ },
1162
+ {
1163
+ "epoch": 16.88073394495413,
1164
+ "grad_norm": 11.944501876831055,
1165
+ "learning_rate": 2.4096936442615455e-05,
1166
+ "loss": 0.7614,
1167
+ "step": 1380
1168
+ },
1169
+ {
1170
+ "epoch": 16.990825688073393,
1171
+ "eval_accuracy": 0.6359447004608295,
1172
+ "eval_f1": 0.6305311465399625,
1173
+ "eval_loss": 0.9012994170188904,
1174
+ "eval_precision": 0.6433955302551128,
1175
+ "eval_recall": 0.6359447004608295,
1176
+ "eval_runtime": 2.1782,
1177
+ "eval_samples_per_second": 99.624,
1178
+ "eval_steps_per_second": 6.427,
1179
+ "step": 1389
1180
+ },
1181
+ {
1182
+ "epoch": 17.003058103975537,
1183
+ "grad_norm": 11.058562278747559,
1184
+ "learning_rate": 2.3868312757201647e-05,
1185
+ "loss": 0.7566,
1186
+ "step": 1390
1187
+ },
1188
+ {
1189
+ "epoch": 17.125382262996943,
1190
+ "grad_norm": 12.137784004211426,
1191
+ "learning_rate": 2.3639689071787838e-05,
1192
+ "loss": 0.6842,
1193
+ "step": 1400
1194
+ },
1195
+ {
1196
+ "epoch": 17.24770642201835,
1197
+ "grad_norm": 12.046640396118164,
1198
+ "learning_rate": 2.341106538637403e-05,
1199
+ "loss": 0.7126,
1200
+ "step": 1410
1201
+ },
1202
+ {
1203
+ "epoch": 17.370030581039757,
1204
+ "grad_norm": 13.35624885559082,
1205
+ "learning_rate": 2.318244170096022e-05,
1206
+ "loss": 0.721,
1207
+ "step": 1420
1208
+ },
1209
+ {
1210
+ "epoch": 17.492354740061163,
1211
+ "grad_norm": 10.566619873046875,
1212
+ "learning_rate": 2.2953818015546412e-05,
1213
+ "loss": 0.7424,
1214
+ "step": 1430
1215
+ },
1216
+ {
1217
+ "epoch": 17.61467889908257,
1218
+ "grad_norm": 11.238773345947266,
1219
+ "learning_rate": 2.2725194330132603e-05,
1220
+ "loss": 0.7375,
1221
+ "step": 1440
1222
+ },
1223
+ {
1224
+ "epoch": 17.737003058103976,
1225
+ "grad_norm": 11.583772659301758,
1226
+ "learning_rate": 2.2496570644718794e-05,
1227
+ "loss": 0.7177,
1228
+ "step": 1450
1229
+ },
1230
+ {
1231
+ "epoch": 17.859327217125383,
1232
+ "grad_norm": 11.983872413635254,
1233
+ "learning_rate": 2.2267946959304986e-05,
1234
+ "loss": 0.7393,
1235
+ "step": 1460
1236
+ },
1237
+ {
1238
+ "epoch": 17.98165137614679,
1239
+ "grad_norm": 9.852224349975586,
1240
+ "learning_rate": 2.2039323273891177e-05,
1241
+ "loss": 0.725,
1242
+ "step": 1470
1243
+ },
1244
+ {
1245
+ "epoch": 17.99388379204893,
1246
+ "eval_accuracy": 0.5990783410138248,
1247
+ "eval_f1": 0.5975030533834751,
1248
+ "eval_loss": 0.9701642394065857,
1249
+ "eval_precision": 0.6072043560801691,
1250
+ "eval_recall": 0.5990783410138248,
1251
+ "eval_runtime": 2.1744,
1252
+ "eval_samples_per_second": 99.798,
1253
+ "eval_steps_per_second": 6.439,
1254
+ "step": 1471
1255
+ },
1256
+ {
1257
+ "epoch": 18.103975535168196,
1258
+ "grad_norm": 10.969733238220215,
1259
+ "learning_rate": 2.1810699588477368e-05,
1260
+ "loss": 0.6728,
1261
+ "step": 1480
1262
+ },
1263
+ {
1264
+ "epoch": 18.226299694189603,
1265
+ "grad_norm": 11.671346664428711,
1266
+ "learning_rate": 2.158207590306356e-05,
1267
+ "loss": 0.6974,
1268
+ "step": 1490
1269
+ },
1270
+ {
1271
+ "epoch": 18.34862385321101,
1272
+ "grad_norm": 8.975910186767578,
1273
+ "learning_rate": 2.135345221764975e-05,
1274
+ "loss": 0.6503,
1275
+ "step": 1500
1276
+ },
1277
+ {
1278
+ "epoch": 18.470948012232416,
1279
+ "grad_norm": 13.026418685913086,
1280
+ "learning_rate": 2.1124828532235942e-05,
1281
+ "loss": 0.6879,
1282
+ "step": 1510
1283
+ },
1284
+ {
1285
+ "epoch": 18.593272171253822,
1286
+ "grad_norm": 15.196861267089844,
1287
+ "learning_rate": 2.089620484682213e-05,
1288
+ "loss": 0.6912,
1289
+ "step": 1520
1290
+ },
1291
+ {
1292
+ "epoch": 18.71559633027523,
1293
+ "grad_norm": 8.060693740844727,
1294
+ "learning_rate": 2.066758116140832e-05,
1295
+ "loss": 0.7287,
1296
+ "step": 1530
1297
+ },
1298
+ {
1299
+ "epoch": 18.837920489296636,
1300
+ "grad_norm": 10.259760856628418,
1301
+ "learning_rate": 2.0438957475994513e-05,
1302
+ "loss": 0.6657,
1303
+ "step": 1540
1304
+ },
1305
+ {
1306
+ "epoch": 18.960244648318042,
1307
+ "grad_norm": 10.083440780639648,
1308
+ "learning_rate": 2.0210333790580704e-05,
1309
+ "loss": 0.6938,
1310
+ "step": 1550
1311
+ },
1312
+ {
1313
+ "epoch": 18.996941896024463,
1314
+ "eval_accuracy": 0.6728110599078341,
1315
+ "eval_f1": 0.6659537595021466,
1316
+ "eval_loss": 0.9598256349563599,
1317
+ "eval_precision": 0.683992662820187,
1318
+ "eval_recall": 0.6728110599078341,
1319
+ "eval_runtime": 2.1821,
1320
+ "eval_samples_per_second": 99.445,
1321
+ "eval_steps_per_second": 6.416,
1322
+ "step": 1553
1323
+ },
1324
+ {
1325
+ "epoch": 19.08256880733945,
1326
+ "grad_norm": 13.321717262268066,
1327
+ "learning_rate": 1.9981710105166895e-05,
1328
+ "loss": 0.6849,
1329
+ "step": 1560
1330
+ },
1331
+ {
1332
+ "epoch": 19.204892966360855,
1333
+ "grad_norm": 9.200907707214355,
1334
+ "learning_rate": 1.9753086419753087e-05,
1335
+ "loss": 0.6397,
1336
+ "step": 1570
1337
+ },
1338
+ {
1339
+ "epoch": 19.327217125382262,
1340
+ "grad_norm": 12.469816207885742,
1341
+ "learning_rate": 1.9524462734339278e-05,
1342
+ "loss": 0.6554,
1343
+ "step": 1580
1344
+ },
1345
+ {
1346
+ "epoch": 19.44954128440367,
1347
+ "grad_norm": 10.692110061645508,
1348
+ "learning_rate": 1.929583904892547e-05,
1349
+ "loss": 0.656,
1350
+ "step": 1590
1351
+ },
1352
+ {
1353
+ "epoch": 19.571865443425075,
1354
+ "grad_norm": 11.429883003234863,
1355
+ "learning_rate": 1.906721536351166e-05,
1356
+ "loss": 0.6959,
1357
+ "step": 1600
1358
+ },
1359
+ {
1360
+ "epoch": 19.69418960244648,
1361
+ "grad_norm": 10.087126731872559,
1362
+ "learning_rate": 1.8838591678097852e-05,
1363
+ "loss": 0.6632,
1364
+ "step": 1610
1365
+ },
1366
+ {
1367
+ "epoch": 19.81651376146789,
1368
+ "grad_norm": 16.846824645996094,
1369
+ "learning_rate": 1.8609967992684043e-05,
1370
+ "loss": 0.6307,
1371
+ "step": 1620
1372
+ },
1373
+ {
1374
+ "epoch": 19.938837920489295,
1375
+ "grad_norm": 12.659772872924805,
1376
+ "learning_rate": 1.8381344307270234e-05,
1377
+ "loss": 0.6761,
1378
+ "step": 1630
1379
+ },
1380
+ {
1381
+ "epoch": 20.0,
1382
+ "eval_accuracy": 0.6082949308755761,
1383
+ "eval_f1": 0.6112076266773331,
1384
+ "eval_loss": 0.9886434078216553,
1385
+ "eval_precision": 0.6242090052989822,
1386
+ "eval_recall": 0.6082949308755761,
1387
+ "eval_runtime": 2.1998,
1388
+ "eval_samples_per_second": 98.647,
1389
+ "eval_steps_per_second": 6.364,
1390
+ "step": 1635
1391
+ },
1392
+ {
1393
+ "epoch": 20.061162079510705,
1394
+ "grad_norm": 11.294504165649414,
1395
+ "learning_rate": 1.8152720621856426e-05,
1396
+ "loss": 0.6726,
1397
+ "step": 1640
1398
+ },
1399
+ {
1400
+ "epoch": 20.18348623853211,
1401
+ "grad_norm": 10.856670379638672,
1402
+ "learning_rate": 1.7924096936442617e-05,
1403
+ "loss": 0.626,
1404
+ "step": 1650
1405
+ },
1406
+ {
1407
+ "epoch": 20.30581039755352,
1408
+ "grad_norm": 13.345258712768555,
1409
+ "learning_rate": 1.769547325102881e-05,
1410
+ "loss": 0.6446,
1411
+ "step": 1660
1412
+ },
1413
+ {
1414
+ "epoch": 20.428134556574925,
1415
+ "grad_norm": 9.459267616271973,
1416
+ "learning_rate": 1.7466849565615e-05,
1417
+ "loss": 0.6125,
1418
+ "step": 1670
1419
+ },
1420
+ {
1421
+ "epoch": 20.55045871559633,
1422
+ "grad_norm": 14.87169361114502,
1423
+ "learning_rate": 1.723822588020119e-05,
1424
+ "loss": 0.575,
1425
+ "step": 1680
1426
+ },
1427
+ {
1428
+ "epoch": 20.672782874617738,
1429
+ "grad_norm": 14.521281242370605,
1430
+ "learning_rate": 1.700960219478738e-05,
1431
+ "loss": 0.6999,
1432
+ "step": 1690
1433
+ },
1434
+ {
1435
+ "epoch": 20.795107033639145,
1436
+ "grad_norm": 12.60116195678711,
1437
+ "learning_rate": 1.678097850937357e-05,
1438
+ "loss": 0.5862,
1439
+ "step": 1700
1440
+ },
1441
+ {
1442
+ "epoch": 20.91743119266055,
1443
+ "grad_norm": 10.943224906921387,
1444
+ "learning_rate": 1.655235482395976e-05,
1445
+ "loss": 0.5865,
1446
+ "step": 1710
1447
+ },
1448
+ {
1449
+ "epoch": 20.990825688073393,
1450
+ "eval_accuracy": 0.6497695852534562,
1451
+ "eval_f1": 0.6428003832696357,
1452
+ "eval_loss": 0.9367409348487854,
1453
+ "eval_precision": 0.6431866677852993,
1454
+ "eval_recall": 0.6497695852534562,
1455
+ "eval_runtime": 2.1726,
1456
+ "eval_samples_per_second": 99.881,
1457
+ "eval_steps_per_second": 6.444,
1458
+ "step": 1716
1459
+ },
1460
+ {
1461
+ "epoch": 21.039755351681958,
1462
+ "grad_norm": 11.365513801574707,
1463
+ "learning_rate": 1.6323731138545953e-05,
1464
+ "loss": 0.6488,
1465
+ "step": 1720
1466
+ },
1467
+ {
1468
+ "epoch": 21.162079510703364,
1469
+ "grad_norm": 8.919206619262695,
1470
+ "learning_rate": 1.6095107453132147e-05,
1471
+ "loss": 0.6201,
1472
+ "step": 1730
1473
+ },
1474
+ {
1475
+ "epoch": 21.28440366972477,
1476
+ "grad_norm": 12.377992630004883,
1477
+ "learning_rate": 1.586648376771834e-05,
1478
+ "loss": 0.5806,
1479
+ "step": 1740
1480
+ },
1481
+ {
1482
+ "epoch": 21.406727828746178,
1483
+ "grad_norm": 12.146440505981445,
1484
+ "learning_rate": 1.563786008230453e-05,
1485
+ "loss": 0.5506,
1486
+ "step": 1750
1487
+ },
1488
+ {
1489
+ "epoch": 21.529051987767584,
1490
+ "grad_norm": 11.200637817382812,
1491
+ "learning_rate": 1.5409236396890718e-05,
1492
+ "loss": 0.5667,
1493
+ "step": 1760
1494
+ },
1495
+ {
1496
+ "epoch": 21.65137614678899,
1497
+ "grad_norm": 14.65993595123291,
1498
+ "learning_rate": 1.5180612711476911e-05,
1499
+ "loss": 0.5936,
1500
+ "step": 1770
1501
+ },
1502
+ {
1503
+ "epoch": 21.773700305810397,
1504
+ "grad_norm": 14.228142738342285,
1505
+ "learning_rate": 1.49519890260631e-05,
1506
+ "loss": 0.5791,
1507
+ "step": 1780
1508
+ },
1509
+ {
1510
+ "epoch": 21.896024464831804,
1511
+ "grad_norm": 12.14127254486084,
1512
+ "learning_rate": 1.4723365340649292e-05,
1513
+ "loss": 0.5857,
1514
+ "step": 1790
1515
+ },
1516
+ {
1517
+ "epoch": 21.99388379204893,
1518
+ "eval_accuracy": 0.631336405529954,
1519
+ "eval_f1": 0.6321523990020061,
1520
+ "eval_loss": 0.9693921208381653,
1521
+ "eval_precision": 0.6331329862312,
1522
+ "eval_recall": 0.631336405529954,
1523
+ "eval_runtime": 2.1772,
1524
+ "eval_samples_per_second": 99.669,
1525
+ "eval_steps_per_second": 6.43,
1526
+ "step": 1798
1527
+ },
1528
+ {
1529
+ "epoch": 22.01834862385321,
1530
+ "grad_norm": 8.9346342086792,
1531
+ "learning_rate": 1.4494741655235483e-05,
1532
+ "loss": 0.553,
1533
+ "step": 1800
1534
+ },
1535
+ {
1536
+ "epoch": 22.140672782874617,
1537
+ "grad_norm": 15.74846363067627,
1538
+ "learning_rate": 1.4266117969821674e-05,
1539
+ "loss": 0.5693,
1540
+ "step": 1810
1541
+ },
1542
+ {
1543
+ "epoch": 22.262996941896024,
1544
+ "grad_norm": 17.549697875976562,
1545
+ "learning_rate": 1.4037494284407866e-05,
1546
+ "loss": 0.5955,
1547
+ "step": 1820
1548
+ },
1549
+ {
1550
+ "epoch": 22.38532110091743,
1551
+ "grad_norm": 12.935697555541992,
1552
+ "learning_rate": 1.3808870598994055e-05,
1553
+ "loss": 0.5184,
1554
+ "step": 1830
1555
+ },
1556
+ {
1557
+ "epoch": 22.507645259938837,
1558
+ "grad_norm": 14.531293869018555,
1559
+ "learning_rate": 1.3580246913580247e-05,
1560
+ "loss": 0.5873,
1561
+ "step": 1840
1562
+ },
1563
+ {
1564
+ "epoch": 22.629969418960243,
1565
+ "grad_norm": 11.71330451965332,
1566
+ "learning_rate": 1.3351623228166438e-05,
1567
+ "loss": 0.538,
1568
+ "step": 1850
1569
+ },
1570
+ {
1571
+ "epoch": 22.75229357798165,
1572
+ "grad_norm": 11.269133567810059,
1573
+ "learning_rate": 1.312299954275263e-05,
1574
+ "loss": 0.5819,
1575
+ "step": 1860
1576
+ },
1577
+ {
1578
+ "epoch": 22.874617737003057,
1579
+ "grad_norm": 9.762799263000488,
1580
+ "learning_rate": 1.2894375857338819e-05,
1581
+ "loss": 0.5612,
1582
+ "step": 1870
1583
+ },
1584
+ {
1585
+ "epoch": 22.996941896024463,
1586
+ "grad_norm": 9.858174324035645,
1587
+ "learning_rate": 1.2665752171925014e-05,
1588
+ "loss": 0.556,
1589
+ "step": 1880
1590
+ },
1591
+ {
1592
+ "epoch": 22.996941896024463,
1593
+ "eval_accuracy": 0.6359447004608295,
1594
+ "eval_f1": 0.629638753332476,
1595
+ "eval_loss": 1.021164059638977,
1596
+ "eval_precision": 0.6574294916995147,
1597
+ "eval_recall": 0.6359447004608295,
1598
+ "eval_runtime": 2.1738,
1599
+ "eval_samples_per_second": 99.824,
1600
+ "eval_steps_per_second": 6.44,
1601
+ "step": 1880
1602
+ },
1603
+ {
1604
+ "epoch": 23.119266055045873,
1605
+ "grad_norm": 18.760498046875,
1606
+ "learning_rate": 1.2437128486511203e-05,
1607
+ "loss": 0.5428,
1608
+ "step": 1890
1609
+ },
1610
+ {
1611
+ "epoch": 23.24159021406728,
1612
+ "grad_norm": 12.281718254089355,
1613
+ "learning_rate": 1.2208504801097394e-05,
1614
+ "loss": 0.5557,
1615
+ "step": 1900
1616
+ },
1617
+ {
1618
+ "epoch": 23.363914373088686,
1619
+ "grad_norm": 11.702547073364258,
1620
+ "learning_rate": 1.1979881115683586e-05,
1621
+ "loss": 0.547,
1622
+ "step": 1910
1623
+ },
1624
+ {
1625
+ "epoch": 23.486238532110093,
1626
+ "grad_norm": 9.12270450592041,
1627
+ "learning_rate": 1.1751257430269777e-05,
1628
+ "loss": 0.4957,
1629
+ "step": 1920
1630
+ },
1631
+ {
1632
+ "epoch": 23.6085626911315,
1633
+ "grad_norm": 12.279609680175781,
1634
+ "learning_rate": 1.1522633744855968e-05,
1635
+ "loss": 0.5025,
1636
+ "step": 1930
1637
+ },
1638
+ {
1639
+ "epoch": 23.730886850152906,
1640
+ "grad_norm": 12.506765365600586,
1641
+ "learning_rate": 1.129401005944216e-05,
1642
+ "loss": 0.5863,
1643
+ "step": 1940
1644
+ },
1645
+ {
1646
+ "epoch": 23.853211009174313,
1647
+ "grad_norm": 13.289384841918945,
1648
+ "learning_rate": 1.106538637402835e-05,
1649
+ "loss": 0.5533,
1650
+ "step": 1950
1651
+ },
1652
+ {
1653
+ "epoch": 23.97553516819572,
1654
+ "grad_norm": 12.350972175598145,
1655
+ "learning_rate": 1.083676268861454e-05,
1656
+ "loss": 0.4871,
1657
+ "step": 1960
1658
+ },
1659
+ {
1660
+ "epoch": 24.0,
1661
+ "eval_accuracy": 0.5944700460829493,
1662
+ "eval_f1": 0.587885178704199,
1663
+ "eval_loss": 1.032782793045044,
1664
+ "eval_precision": 0.5950577616931705,
1665
+ "eval_recall": 0.5944700460829493,
1666
+ "eval_runtime": 2.183,
1667
+ "eval_samples_per_second": 99.404,
1668
+ "eval_steps_per_second": 6.413,
1669
+ "step": 1962
1670
+ },
1671
+ {
1672
+ "epoch": 24.097859327217126,
1673
+ "grad_norm": 9.447975158691406,
1674
+ "learning_rate": 1.0608139003200732e-05,
1675
+ "loss": 0.4881,
1676
+ "step": 1970
1677
+ },
1678
+ {
1679
+ "epoch": 24.220183486238533,
1680
+ "grad_norm": 11.92766284942627,
1681
+ "learning_rate": 1.0379515317786923e-05,
1682
+ "loss": 0.5066,
1683
+ "step": 1980
1684
+ },
1685
+ {
1686
+ "epoch": 24.34250764525994,
1687
+ "grad_norm": 11.550183296203613,
1688
+ "learning_rate": 1.0150891632373114e-05,
1689
+ "loss": 0.4547,
1690
+ "step": 1990
1691
+ },
1692
+ {
1693
+ "epoch": 24.464831804281346,
1694
+ "grad_norm": 10.570334434509277,
1695
+ "learning_rate": 9.922267946959306e-06,
1696
+ "loss": 0.504,
1697
+ "step": 2000
1698
+ },
1699
+ {
1700
+ "epoch": 24.587155963302752,
1701
+ "grad_norm": 12.56131362915039,
1702
+ "learning_rate": 9.693644261545497e-06,
1703
+ "loss": 0.4765,
1704
+ "step": 2010
1705
+ },
1706
+ {
1707
+ "epoch": 24.70948012232416,
1708
+ "grad_norm": 10.12960433959961,
1709
+ "learning_rate": 9.465020576131688e-06,
1710
+ "loss": 0.4862,
1711
+ "step": 2020
1712
+ },
1713
+ {
1714
+ "epoch": 24.831804281345565,
1715
+ "grad_norm": 14.820987701416016,
1716
+ "learning_rate": 9.236396890717878e-06,
1717
+ "loss": 0.4748,
1718
+ "step": 2030
1719
+ },
1720
+ {
1721
+ "epoch": 24.954128440366972,
1722
+ "grad_norm": 13.520421981811523,
1723
+ "learning_rate": 9.00777320530407e-06,
1724
+ "loss": 0.5254,
1725
+ "step": 2040
1726
+ },
1727
+ {
1728
+ "epoch": 24.990825688073393,
1729
+ "eval_accuracy": 0.5944700460829493,
1730
+ "eval_f1": 0.5917457657549822,
1731
+ "eval_loss": 1.0132023096084595,
1732
+ "eval_precision": 0.5968091048736209,
1733
+ "eval_recall": 0.5944700460829493,
1734
+ "eval_runtime": 2.1697,
1735
+ "eval_samples_per_second": 100.012,
1736
+ "eval_steps_per_second": 6.452,
1737
+ "step": 2043
1738
+ },
1739
+ {
1740
+ "epoch": 25.07645259938838,
1741
+ "grad_norm": 11.470428466796875,
1742
+ "learning_rate": 8.77914951989026e-06,
1743
+ "loss": 0.4879,
1744
+ "step": 2050
1745
+ },
1746
+ {
1747
+ "epoch": 25.198776758409785,
1748
+ "grad_norm": 12.04905891418457,
1749
+ "learning_rate": 8.550525834476454e-06,
1750
+ "loss": 0.4719,
1751
+ "step": 2060
1752
+ },
1753
+ {
1754
+ "epoch": 25.321100917431192,
1755
+ "grad_norm": 14.935491561889648,
1756
+ "learning_rate": 8.321902149062643e-06,
1757
+ "loss": 0.4989,
1758
+ "step": 2070
1759
+ },
1760
+ {
1761
+ "epoch": 25.4434250764526,
1762
+ "grad_norm": 13.884613037109375,
1763
+ "learning_rate": 8.093278463648834e-06,
1764
+ "loss": 0.4718,
1765
+ "step": 2080
1766
+ },
1767
+ {
1768
+ "epoch": 25.565749235474005,
1769
+ "grad_norm": 14.0990629196167,
1770
+ "learning_rate": 7.864654778235026e-06,
1771
+ "loss": 0.4833,
1772
+ "step": 2090
1773
+ },
1774
+ {
1775
+ "epoch": 25.68807339449541,
1776
+ "grad_norm": 15.498763084411621,
1777
+ "learning_rate": 7.636031092821217e-06,
1778
+ "loss": 0.4719,
1779
+ "step": 2100
1780
+ },
1781
+ {
1782
+ "epoch": 25.810397553516818,
1783
+ "grad_norm": 17.733688354492188,
1784
+ "learning_rate": 7.4074074074074075e-06,
1785
+ "loss": 0.5214,
1786
+ "step": 2110
1787
+ },
1788
+ {
1789
+ "epoch": 25.932721712538225,
1790
+ "grad_norm": 12.92156982421875,
1791
+ "learning_rate": 7.178783721993598e-06,
1792
+ "loss": 0.5054,
1793
+ "step": 2120
1794
+ },
1795
+ {
1796
+ "epoch": 25.99388379204893,
1797
+ "eval_accuracy": 0.5944700460829493,
1798
+ "eval_f1": 0.5943911153011756,
1799
+ "eval_loss": 1.038478136062622,
1800
+ "eval_precision": 0.5987861045709855,
1801
+ "eval_recall": 0.5944700460829493,
1802
+ "eval_runtime": 2.2069,
1803
+ "eval_samples_per_second": 98.326,
1804
+ "eval_steps_per_second": 6.344,
1805
+ "step": 2125
1806
+ },
1807
+ {
1808
+ "epoch": 26.05504587155963,
1809
+ "grad_norm": 12.785786628723145,
1810
+ "learning_rate": 6.950160036579791e-06,
1811
+ "loss": 0.5009,
1812
+ "step": 2130
1813
+ },
1814
+ {
1815
+ "epoch": 26.17737003058104,
1816
+ "grad_norm": 12.252779006958008,
1817
+ "learning_rate": 6.721536351165981e-06,
1818
+ "loss": 0.4896,
1819
+ "step": 2140
1820
+ },
1821
+ {
1822
+ "epoch": 26.299694189602448,
1823
+ "grad_norm": 8.970731735229492,
1824
+ "learning_rate": 6.492912665752173e-06,
1825
+ "loss": 0.4409,
1826
+ "step": 2150
1827
+ },
1828
+ {
1829
+ "epoch": 26.422018348623855,
1830
+ "grad_norm": 14.119239807128906,
1831
+ "learning_rate": 6.264288980338363e-06,
1832
+ "loss": 0.4685,
1833
+ "step": 2160
1834
+ },
1835
+ {
1836
+ "epoch": 26.54434250764526,
1837
+ "grad_norm": 13.817703247070312,
1838
+ "learning_rate": 6.0356652949245544e-06,
1839
+ "loss": 0.423,
1840
+ "step": 2170
1841
+ },
1842
+ {
1843
+ "epoch": 26.666666666666668,
1844
+ "grad_norm": 11.813767433166504,
1845
+ "learning_rate": 5.807041609510746e-06,
1846
+ "loss": 0.4338,
1847
+ "step": 2180
1848
+ },
1849
+ {
1850
+ "epoch": 26.788990825688074,
1851
+ "grad_norm": 10.11328411102295,
1852
+ "learning_rate": 5.578417924096937e-06,
1853
+ "loss": 0.4317,
1854
+ "step": 2190
1855
+ },
1856
+ {
1857
+ "epoch": 26.91131498470948,
1858
+ "grad_norm": 12.501155853271484,
1859
+ "learning_rate": 5.3497942386831275e-06,
1860
+ "loss": 0.4706,
1861
+ "step": 2200
1862
+ },
1863
+ {
1864
+ "epoch": 26.996941896024463,
1865
+ "eval_accuracy": 0.6036866359447005,
1866
+ "eval_f1": 0.5983142033282286,
1867
+ "eval_loss": 1.0625784397125244,
1868
+ "eval_precision": 0.6100467719653185,
1869
+ "eval_recall": 0.6036866359447005,
1870
+ "eval_runtime": 2.1672,
1871
+ "eval_samples_per_second": 100.128,
1872
+ "eval_steps_per_second": 6.46,
1873
+ "step": 2207
1874
+ },
1875
+ {
1876
+ "epoch": 27.033639143730888,
1877
+ "grad_norm": 12.064492225646973,
1878
+ "learning_rate": 5.121170553269319e-06,
1879
+ "loss": 0.4762,
1880
+ "step": 2210
1881
+ },
1882
+ {
1883
+ "epoch": 27.155963302752294,
1884
+ "grad_norm": 12.347169876098633,
1885
+ "learning_rate": 4.89254686785551e-06,
1886
+ "loss": 0.4493,
1887
+ "step": 2220
1888
+ },
1889
+ {
1890
+ "epoch": 27.2782874617737,
1891
+ "grad_norm": 12.065176010131836,
1892
+ "learning_rate": 4.663923182441701e-06,
1893
+ "loss": 0.4341,
1894
+ "step": 2230
1895
+ },
1896
+ {
1897
+ "epoch": 27.400611620795107,
1898
+ "grad_norm": 12.744647979736328,
1899
+ "learning_rate": 4.435299497027892e-06,
1900
+ "loss": 0.4373,
1901
+ "step": 2240
1902
+ },
1903
+ {
1904
+ "epoch": 27.522935779816514,
1905
+ "grad_norm": 14.091771125793457,
1906
+ "learning_rate": 4.206675811614083e-06,
1907
+ "loss": 0.4286,
1908
+ "step": 2250
1909
+ },
1910
+ {
1911
+ "epoch": 27.64525993883792,
1912
+ "grad_norm": 11.384113311767578,
1913
+ "learning_rate": 3.9780521262002744e-06,
1914
+ "loss": 0.4338,
1915
+ "step": 2260
1916
+ },
1917
+ {
1918
+ "epoch": 27.767584097859327,
1919
+ "grad_norm": 12.185340881347656,
1920
+ "learning_rate": 3.7494284407864657e-06,
1921
+ "loss": 0.4482,
1922
+ "step": 2270
1923
+ },
1924
+ {
1925
+ "epoch": 27.889908256880734,
1926
+ "grad_norm": 10.328293800354004,
1927
+ "learning_rate": 3.5208047553726566e-06,
1928
+ "loss": 0.418,
1929
+ "step": 2280
1930
+ },
1931
+ {
1932
+ "epoch": 28.0,
1933
+ "eval_accuracy": 0.5806451612903226,
1934
+ "eval_f1": 0.5774461884155286,
1935
+ "eval_loss": 1.053132176399231,
1936
+ "eval_precision": 0.5829576016453093,
1937
+ "eval_recall": 0.5806451612903226,
1938
+ "eval_runtime": 2.1931,
1939
+ "eval_samples_per_second": 98.948,
1940
+ "eval_steps_per_second": 6.384,
1941
+ "step": 2289
1942
+ },
1943
+ {
1944
+ "epoch": 28.01223241590214,
1945
+ "grad_norm": 13.196370124816895,
1946
+ "learning_rate": 3.2921810699588483e-06,
1947
+ "loss": 0.4523,
1948
+ "step": 2290
1949
+ },
1950
+ {
1951
+ "epoch": 28.134556574923547,
1952
+ "grad_norm": 12.735855102539062,
1953
+ "learning_rate": 3.063557384545039e-06,
1954
+ "loss": 0.3987,
1955
+ "step": 2300
1956
+ },
1957
+ {
1958
+ "epoch": 28.256880733944953,
1959
+ "grad_norm": 13.810506820678711,
1960
+ "learning_rate": 2.83493369913123e-06,
1961
+ "loss": 0.4244,
1962
+ "step": 2310
1963
+ },
1964
+ {
1965
+ "epoch": 28.37920489296636,
1966
+ "grad_norm": 14.41588306427002,
1967
+ "learning_rate": 2.6063100137174214e-06,
1968
+ "loss": 0.4746,
1969
+ "step": 2320
1970
+ },
1971
+ {
1972
+ "epoch": 28.501529051987767,
1973
+ "grad_norm": 13.99752426147461,
1974
+ "learning_rate": 2.3776863283036123e-06,
1975
+ "loss": 0.4344,
1976
+ "step": 2330
1977
+ },
1978
+ {
1979
+ "epoch": 28.623853211009173,
1980
+ "grad_norm": 13.782439231872559,
1981
+ "learning_rate": 2.1490626428898036e-06,
1982
+ "loss": 0.4018,
1983
+ "step": 2340
1984
+ },
1985
+ {
1986
+ "epoch": 28.74617737003058,
1987
+ "grad_norm": 10.79996109008789,
1988
+ "learning_rate": 1.920438957475995e-06,
1989
+ "loss": 0.3779,
1990
+ "step": 2350
1991
+ },
1992
+ {
1993
+ "epoch": 28.868501529051986,
1994
+ "grad_norm": 12.254060745239258,
1995
+ "learning_rate": 1.6918152720621857e-06,
1996
+ "loss": 0.4671,
1997
+ "step": 2360
1998
+ },
1999
+ {
2000
+ "epoch": 28.990825688073393,
2001
+ "grad_norm": 12.211686134338379,
2002
+ "learning_rate": 1.4631915866483768e-06,
2003
+ "loss": 0.455,
2004
+ "step": 2370
2005
+ },
2006
+ {
2007
+ "epoch": 28.990825688073393,
2008
+ "eval_accuracy": 0.6082949308755761,
2009
+ "eval_f1": 0.6039494237985392,
2010
+ "eval_loss": 1.0340049266815186,
2011
+ "eval_precision": 0.615058251488602,
2012
+ "eval_recall": 0.6082949308755761,
2013
+ "eval_runtime": 2.21,
2014
+ "eval_samples_per_second": 98.191,
2015
+ "eval_steps_per_second": 6.335,
2016
+ "step": 2370
2017
+ },
2018
+ {
2019
+ "epoch": 29.1131498470948,
2020
+ "grad_norm": 12.376998901367188,
2021
+ "learning_rate": 1.234567901234568e-06,
2022
+ "loss": 0.405,
2023
+ "step": 2380
2024
+ },
2025
+ {
2026
+ "epoch": 29.235474006116206,
2027
+ "grad_norm": 13.317730903625488,
2028
+ "learning_rate": 1.0059442158207592e-06,
2029
+ "loss": 0.3908,
2030
+ "step": 2390
2031
+ },
2032
+ {
2033
+ "epoch": 29.357798165137616,
2034
+ "grad_norm": 14.004073143005371,
2035
+ "learning_rate": 7.773205304069502e-07,
2036
+ "loss": 0.4006,
2037
+ "step": 2400
2038
+ },
2039
+ {
2040
+ "epoch": 29.480122324159023,
2041
+ "grad_norm": 11.803933143615723,
2042
+ "learning_rate": 5.486968449931413e-07,
2043
+ "loss": 0.4255,
2044
+ "step": 2410
2045
+ },
2046
+ {
2047
+ "epoch": 29.60244648318043,
2048
+ "grad_norm": 13.087397575378418,
2049
+ "learning_rate": 3.200731595793324e-07,
2050
+ "loss": 0.4354,
2051
+ "step": 2420
2052
+ },
2053
+ {
2054
+ "epoch": 29.724770642201836,
2055
+ "grad_norm": 11.753482818603516,
2056
+ "learning_rate": 9.144947416552355e-08,
2057
+ "loss": 0.4414,
2058
+ "step": 2430
2059
+ },
2060
+ {
2061
+ "epoch": 29.724770642201836,
2062
+ "eval_accuracy": 0.6129032258064516,
2063
+ "eval_f1": 0.6087354657244137,
2064
+ "eval_loss": 1.0280050039291382,
2065
+ "eval_precision": 0.6161055592910982,
2066
+ "eval_recall": 0.6129032258064516,
2067
+ "eval_runtime": 2.2362,
2068
+ "eval_samples_per_second": 97.04,
2069
+ "eval_steps_per_second": 6.261,
2070
+ "step": 2430
2071
+ },
2072
+ {
2073
+ "epoch": 29.724770642201836,
2074
+ "step": 2430,
2075
+ "total_flos": 1.2301888231484006e+19,
2076
+ "train_loss": 0.8217329954414211,
2077
+ "train_runtime": 6060.3586,
2078
+ "train_samples_per_second": 25.865,
2079
+ "train_steps_per_second": 0.401
2080
+ }
2081
+ ],
2082
+ "logging_steps": 10,
2083
+ "max_steps": 2430,
2084
+ "num_input_tokens_seen": 0,
2085
+ "num_train_epochs": 30,
2086
+ "save_steps": 500,
2087
+ "stateful_callbacks": {
2088
+ "TrainerControl": {
2089
+ "args": {
2090
+ "should_epoch_stop": false,
2091
+ "should_evaluate": false,
2092
+ "should_log": false,
2093
+ "should_save": true,
2094
+ "should_training_stop": true
2095
+ },
2096
+ "attributes": {}
2097
+ }
2098
+ },
2099
+ "total_flos": 1.2301888231484006e+19,
2100
+ "train_batch_size": 16,
2101
+ "trial_name": null,
2102
+ "trial_params": null
2103
+ }