End of training
Browse files- .ipynb_checkpoints/config-checkpoint.json +35 -0
- README.md +115 -17
- model.safetensors +1 -1
.ipynb_checkpoints/config-checkpoint.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "zera09/long_t5_4",
|
3 |
+
"architectures": [
|
4 |
+
"LongT5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"d_ff": 2048,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 768,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dense_act_fn": "gelu_new",
|
11 |
+
"dropout_rate": 0.1,
|
12 |
+
"encoder_attention_type": "transient-global",
|
13 |
+
"eos_token_id": 1,
|
14 |
+
"feed_forward_proj": "gated-gelu",
|
15 |
+
"global_block_size": 16,
|
16 |
+
"initializer_factor": 1.0,
|
17 |
+
"is_encoder_decoder": true,
|
18 |
+
"is_gated_act": true,
|
19 |
+
"layer_norm_epsilon": 1e-06,
|
20 |
+
"local_radius": 127,
|
21 |
+
"model_type": "longt5",
|
22 |
+
"n_positions": 4096,
|
23 |
+
"num_decoder_layers": 12,
|
24 |
+
"num_heads": 12,
|
25 |
+
"num_layers": 12,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"relative_attention_max_distance": 128,
|
29 |
+
"relative_attention_num_buckets": 32,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.45.2",
|
33 |
+
"use_cache": true,
|
34 |
+
"vocab_size": 32128
|
35 |
+
}
|
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: apache-2.0
|
4 |
-
base_model: zera09/
|
5 |
tags:
|
6 |
- trl
|
7 |
- dpo
|
@@ -16,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
# dpo_t5_3
|
18 |
|
19 |
-
This model is a fine-tuned version of [zera09/
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
-
- Rewards/chosen: 0.
|
23 |
-
- Rewards/rejected: -
|
24 |
-
- Rewards/accuracies: 0.
|
25 |
-
- Rewards/margins:
|
26 |
-
- Logps/rejected: -
|
27 |
-
- Logps/chosen: -25.
|
28 |
-
- Logits/rejected: -18.
|
29 |
-
- Logits/chosen: -
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -46,21 +46,119 @@ More information needed
|
|
46 |
|
47 |
The following hyperparameters were used during training:
|
48 |
- learning_rate: 5e-07
|
49 |
-
- train_batch_size:
|
50 |
-
- eval_batch_size:
|
51 |
- seed: 42
|
52 |
- gradient_accumulation_steps: 4
|
53 |
-
- total_train_batch_size:
|
54 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
55 |
- lr_scheduler_type: cosine
|
56 |
-
- training_steps:
|
57 |
|
58 |
### Training results
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
-
| 0.
|
63 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
|
66 |
### Framework versions
|
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: apache-2.0
|
4 |
+
base_model: zera09/long_t5_4
|
5 |
tags:
|
6 |
- trl
|
7 |
- dpo
|
|
|
16 |
|
17 |
# dpo_t5_3
|
18 |
|
19 |
+
This model is a fine-tuned version of [zera09/long_t5_4](https://huggingface.co/zera09/long_t5_4) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.3279
|
22 |
+
- Rewards/chosen: 0.4243
|
23 |
+
- Rewards/rejected: -1.0110
|
24 |
+
- Rewards/accuracies: 0.8625
|
25 |
+
- Rewards/margins: 1.4353
|
26 |
+
- Logps/rejected: -7.0019
|
27 |
+
- Logps/chosen: -25.3685
|
28 |
+
- Logits/rejected: -18.2655
|
29 |
+
- Logits/chosen: -17.9202
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
46 |
|
47 |
The following hyperparameters were used during training:
|
48 |
- learning_rate: 5e-07
|
49 |
+
- train_batch_size: 8
|
50 |
+
- eval_batch_size: 32
|
51 |
- seed: 42
|
52 |
- gradient_accumulation_steps: 4
|
53 |
+
- total_train_batch_size: 32
|
54 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
55 |
- lr_scheduler_type: cosine
|
56 |
+
- training_steps: 1000
|
57 |
|
58 |
### Training results
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
+
| 0.8579 | 0.05 | 10 | 0.6748 | 0.0198 | -0.0177 | 0.8469 | 0.0375 | -5.3464 | -26.0427 | -18.9879 | -18.5345 |
|
63 |
+
| 0.7446 | 0.1 | 20 | 0.6573 | 0.0382 | -0.0362 | 0.8500 | 0.0745 | -5.3773 | -26.0120 | -18.9693 | -18.5188 |
|
64 |
+
| 0.7655 | 0.15 | 30 | 0.6406 | 0.0560 | -0.0553 | 0.8506 | 0.1113 | -5.4091 | -25.9824 | -18.9500 | -18.5025 |
|
65 |
+
| 0.7752 | 0.2 | 40 | 0.6255 | 0.0722 | -0.0736 | 0.8512 | 0.1458 | -5.4396 | -25.9553 | -18.9314 | -18.4868 |
|
66 |
+
| 0.7797 | 0.25 | 50 | 0.6113 | 0.0878 | -0.0916 | 0.8506 | 0.1794 | -5.4696 | -25.9294 | -18.9138 | -18.4718 |
|
67 |
+
| 0.7109 | 0.3 | 60 | 0.5975 | 0.1034 | -0.1102 | 0.8519 | 0.2136 | -5.5006 | -25.9034 | -18.8960 | -18.4569 |
|
68 |
+
| 0.6863 | 0.35 | 70 | 0.5845 | 0.1186 | -0.1282 | 0.8512 | 0.2468 | -5.5307 | -25.8781 | -18.8798 | -18.4433 |
|
69 |
+
| 0.6736 | 0.4 | 80 | 0.5720 | 0.1328 | -0.1470 | 0.8519 | 0.2798 | -5.5619 | -25.8544 | -18.8631 | -18.4291 |
|
70 |
+
| 0.6564 | 0.45 | 90 | 0.5601 | 0.1467 | -0.1658 | 0.8519 | 0.3125 | -5.5933 | -25.8312 | -18.8455 | -18.4141 |
|
71 |
+
| 0.705 | 0.5 | 100 | 0.5490 | 0.1599 | -0.1843 | 0.8519 | 0.3442 | -5.6241 | -25.8092 | -18.8285 | -18.3996 |
|
72 |
+
| 0.6871 | 0.55 | 110 | 0.5380 | 0.1726 | -0.2041 | 0.8531 | 0.3768 | -5.6572 | -25.7880 | -18.8116 | -18.3852 |
|
73 |
+
| 0.7134 | 0.6 | 120 | 0.5275 | 0.1850 | -0.2241 | 0.8550 | 0.4091 | -5.6905 | -25.7674 | -18.7943 | -18.3706 |
|
74 |
+
| 0.6389 | 0.65 | 130 | 0.5179 | 0.1966 | -0.2433 | 0.8562 | 0.4399 | -5.7224 | -25.7480 | -18.7777 | -18.3565 |
|
75 |
+
| 0.6128 | 0.7 | 140 | 0.5087 | 0.2081 | -0.2619 | 0.8569 | 0.4699 | -5.7534 | -25.7289 | -18.7619 | -18.3430 |
|
76 |
+
| 0.6281 | 0.75 | 150 | 0.4996 | 0.2197 | -0.2817 | 0.8569 | 0.5014 | -5.7865 | -25.7096 | -18.7467 | -18.3302 |
|
77 |
+
| 0.6216 | 0.8 | 160 | 0.4910 | 0.2300 | -0.3019 | 0.8569 | 0.5319 | -5.8201 | -25.6924 | -18.7314 | -18.3175 |
|
78 |
+
| 0.6002 | 0.85 | 170 | 0.4828 | 0.2403 | -0.3216 | 0.8575 | 0.5619 | -5.8529 | -25.6752 | -18.7166 | -18.3049 |
|
79 |
+
| 0.5649 | 0.9 | 180 | 0.4752 | 0.2501 | -0.3406 | 0.8575 | 0.5908 | -5.8847 | -25.6588 | -18.7023 | -18.2929 |
|
80 |
+
| 0.5695 | 0.95 | 190 | 0.4680 | 0.2594 | -0.3595 | 0.8575 | 0.6189 | -5.9162 | -25.6434 | -18.6874 | -18.2802 |
|
81 |
+
| 0.5675 | 1.0 | 200 | 0.4611 | 0.2678 | -0.3786 | 0.8575 | 0.6464 | -5.9479 | -25.6293 | -18.6724 | -18.2674 |
|
82 |
+
| 0.5146 | 1.05 | 210 | 0.4544 | 0.2762 | -0.3980 | 0.8575 | 0.6742 | -5.9802 | -25.6154 | -18.6581 | -18.2553 |
|
83 |
+
| 0.558 | 1.1 | 220 | 0.4482 | 0.2841 | -0.4168 | 0.8581 | 0.7009 | -6.0116 | -25.6021 | -18.6442 | -18.2437 |
|
84 |
+
| 0.598 | 1.15 | 230 | 0.4420 | 0.2923 | -0.4358 | 0.8581 | 0.7282 | -6.0433 | -25.5885 | -18.6301 | -18.2318 |
|
85 |
+
| 0.5918 | 1.2 | 240 | 0.4360 | 0.3001 | -0.4552 | 0.8581 | 0.7553 | -6.0755 | -25.5755 | -18.6160 | -18.2198 |
|
86 |
+
| 0.5576 | 1.25 | 250 | 0.4307 | 0.3064 | -0.4737 | 0.8587 | 0.7802 | -6.1065 | -25.5650 | -18.6033 | -18.2090 |
|
87 |
+
| 0.5702 | 1.3 | 260 | 0.4257 | 0.3125 | -0.4916 | 0.8587 | 0.8041 | -6.1363 | -25.5549 | -18.5910 | -18.1985 |
|
88 |
+
| 0.5132 | 1.35 | 270 | 0.4209 | 0.3185 | -0.5090 | 0.8581 | 0.8275 | -6.1652 | -25.5449 | -18.5784 | -18.1877 |
|
89 |
+
| 0.5752 | 1.4 | 280 | 0.4164 | 0.3240 | -0.5260 | 0.8594 | 0.8500 | -6.1936 | -25.5357 | -18.5661 | -18.1772 |
|
90 |
+
| 0.5374 | 1.45 | 290 | 0.4123 | 0.3290 | -0.5419 | 0.8587 | 0.8709 | -6.2202 | -25.5274 | -18.5551 | -18.1678 |
|
91 |
+
| 0.49 | 1.5 | 300 | 0.4082 | 0.3343 | -0.5579 | 0.8594 | 0.8922 | -6.2468 | -25.5185 | -18.5447 | -18.1590 |
|
92 |
+
| 0.5269 | 1.55 | 310 | 0.4040 | 0.3398 | -0.5748 | 0.8594 | 0.9146 | -6.2749 | -25.5094 | -18.5337 | -18.1497 |
|
93 |
+
| 0.4636 | 1.6 | 320 | 0.4001 | 0.3447 | -0.5910 | 0.8600 | 0.9357 | -6.3020 | -25.5012 | -18.5220 | -18.1396 |
|
94 |
+
| 0.4493 | 1.65 | 330 | 0.3963 | 0.3492 | -0.6073 | 0.8594 | 0.9565 | -6.3291 | -25.4937 | -18.5108 | -18.1300 |
|
95 |
+
| 0.5583 | 1.7 | 340 | 0.3928 | 0.3535 | -0.6228 | 0.8594 | 0.9763 | -6.3550 | -25.4865 | -18.5004 | -18.1211 |
|
96 |
+
| 0.5091 | 1.75 | 350 | 0.3895 | 0.3577 | -0.6377 | 0.8594 | 0.9953 | -6.3798 | -25.4796 | -18.4904 | -18.1124 |
|
97 |
+
| 0.484 | 1.8 | 360 | 0.3864 | 0.3613 | -0.6521 | 0.8600 | 1.0134 | -6.4038 | -25.4735 | -18.4815 | -18.1048 |
|
98 |
+
| 0.434 | 1.85 | 370 | 0.3834 | 0.3650 | -0.6665 | 0.8600 | 1.0315 | -6.4278 | -25.4674 | -18.4729 | -18.0974 |
|
99 |
+
| 0.5252 | 1.9 | 380 | 0.3805 | 0.3687 | -0.6809 | 0.8600 | 1.0496 | -6.4518 | -25.4612 | -18.4636 | -18.0894 |
|
100 |
+
| 0.5021 | 1.95 | 390 | 0.3778 | 0.3722 | -0.6940 | 0.8606 | 1.0662 | -6.4736 | -25.4554 | -18.4550 | -18.0821 |
|
101 |
+
| 0.5079 | 2.0 | 400 | 0.3752 | 0.3754 | -0.7071 | 0.8606 | 1.0825 | -6.4954 | -25.4500 | -18.4466 | -18.0749 |
|
102 |
+
| 0.4553 | 2.05 | 410 | 0.3725 | 0.3788 | -0.7208 | 0.8606 | 1.0996 | -6.5184 | -25.4445 | -18.4376 | -18.0672 |
|
103 |
+
| 0.4719 | 2.1 | 420 | 0.3700 | 0.3814 | -0.7348 | 0.8606 | 1.1162 | -6.5417 | -25.4401 | -18.4293 | -18.0602 |
|
104 |
+
| 0.4917 | 2.15 | 430 | 0.3676 | 0.3839 | -0.7481 | 0.8612 | 1.1321 | -6.5638 | -25.4358 | -18.4212 | -18.0532 |
|
105 |
+
| 0.4459 | 2.2 | 440 | 0.3653 | 0.3862 | -0.7614 | 0.8612 | 1.1477 | -6.5860 | -25.4320 | -18.4130 | -18.0462 |
|
106 |
+
| 0.4596 | 2.25 | 450 | 0.3631 | 0.3888 | -0.7744 | 0.8612 | 1.1631 | -6.6075 | -25.4278 | -18.4050 | -18.0393 |
|
107 |
+
| 0.4018 | 2.3 | 460 | 0.3610 | 0.3913 | -0.7862 | 0.8619 | 1.1775 | -6.6274 | -25.4236 | -18.3975 | -18.0328 |
|
108 |
+
| 0.4105 | 2.35 | 470 | 0.3589 | 0.3936 | -0.7986 | 0.8619 | 1.1921 | -6.6479 | -25.4198 | -18.3902 | -18.0267 |
|
109 |
+
| 0.4227 | 2.4 | 480 | 0.3571 | 0.3956 | -0.8097 | 0.8619 | 1.2053 | -6.6664 | -25.4164 | -18.3839 | -18.0214 |
|
110 |
+
| 0.4584 | 2.45 | 490 | 0.3553 | 0.3975 | -0.8205 | 0.8625 | 1.2180 | -6.6844 | -25.4132 | -18.3780 | -18.0165 |
|
111 |
+
| 0.4309 | 2.5 | 500 | 0.3537 | 0.3995 | -0.8299 | 0.8619 | 1.2295 | -6.7002 | -25.4098 | -18.3728 | -18.0121 |
|
112 |
+
| 0.4185 | 2.55 | 510 | 0.3522 | 0.4015 | -0.8390 | 0.8625 | 1.2405 | -6.7153 | -25.4066 | -18.3675 | -18.0077 |
|
113 |
+
| 0.4103 | 2.6 | 520 | 0.3508 | 0.4033 | -0.8480 | 0.8625 | 1.2512 | -6.7303 | -25.4036 | -18.3622 | -18.0031 |
|
114 |
+
| 0.4511 | 2.65 | 530 | 0.3493 | 0.4047 | -0.8570 | 0.8625 | 1.2618 | -6.7454 | -25.4012 | -18.3565 | -17.9982 |
|
115 |
+
| 0.4111 | 2.7 | 540 | 0.3479 | 0.4061 | -0.8666 | 0.8625 | 1.2728 | -6.7613 | -25.3988 | -18.3507 | -17.9932 |
|
116 |
+
| 0.4192 | 2.75 | 550 | 0.3465 | 0.4074 | -0.8763 | 0.8619 | 1.2837 | -6.7774 | -25.3967 | -18.3451 | -17.9885 |
|
117 |
+
| 0.4278 | 2.8 | 560 | 0.3452 | 0.4087 | -0.8848 | 0.8619 | 1.2935 | -6.7916 | -25.3945 | -18.3397 | -17.9838 |
|
118 |
+
| 0.4001 | 2.85 | 570 | 0.3439 | 0.4102 | -0.8927 | 0.8619 | 1.3028 | -6.8048 | -25.3921 | -18.3345 | -17.9793 |
|
119 |
+
| 0.4006 | 2.9 | 580 | 0.3428 | 0.4112 | -0.9007 | 0.8619 | 1.3119 | -6.8181 | -25.3903 | -18.3294 | -17.9749 |
|
120 |
+
| 0.3664 | 2.95 | 590 | 0.3417 | 0.4124 | -0.9084 | 0.8619 | 1.3208 | -6.8309 | -25.3884 | -18.3246 | -17.9707 |
|
121 |
+
| 0.4518 | 3.0 | 600 | 0.3406 | 0.4133 | -0.9159 | 0.8619 | 1.3292 | -6.8435 | -25.3869 | -18.3200 | -17.9668 |
|
122 |
+
| 0.3931 | 3.05 | 610 | 0.3396 | 0.4140 | -0.9233 | 0.8619 | 1.3374 | -6.8558 | -25.3856 | -18.3157 | -17.9631 |
|
123 |
+
| 0.3842 | 3.1 | 620 | 0.3386 | 0.4148 | -0.9300 | 0.8619 | 1.3448 | -6.8670 | -25.3844 | -18.3116 | -17.9596 |
|
124 |
+
| 0.3876 | 3.15 | 630 | 0.3378 | 0.4155 | -0.9363 | 0.8612 | 1.3519 | -6.8775 | -25.3832 | -18.3081 | -17.9566 |
|
125 |
+
| 0.4318 | 3.2 | 640 | 0.3369 | 0.4163 | -0.9423 | 0.8612 | 1.3586 | -6.8875 | -25.3819 | -18.3046 | -17.9536 |
|
126 |
+
| 0.4309 | 3.25 | 650 | 0.3362 | 0.4169 | -0.9481 | 0.8612 | 1.3650 | -6.8971 | -25.3808 | -18.3015 | -17.9509 |
|
127 |
+
| 0.3602 | 3.3 | 660 | 0.3354 | 0.4176 | -0.9537 | 0.8619 | 1.3712 | -6.9064 | -25.3798 | -18.2985 | -17.9484 |
|
128 |
+
| 0.4113 | 3.35 | 670 | 0.3347 | 0.4182 | -0.9590 | 0.8619 | 1.3771 | -6.9152 | -25.3788 | -18.2955 | -17.9459 |
|
129 |
+
| 0.3874 | 3.4 | 680 | 0.3340 | 0.4187 | -0.9641 | 0.8612 | 1.3828 | -6.9237 | -25.3778 | -18.2924 | -17.9431 |
|
130 |
+
| 0.4358 | 3.45 | 690 | 0.3334 | 0.4192 | -0.9686 | 0.8619 | 1.3878 | -6.9312 | -25.3770 | -18.2897 | -17.9408 |
|
131 |
+
| 0.4318 | 3.5 | 700 | 0.3329 | 0.4197 | -0.9725 | 0.8625 | 1.3923 | -6.9379 | -25.3762 | -18.2873 | -17.9388 |
|
132 |
+
| 0.3959 | 3.55 | 710 | 0.3324 | 0.4203 | -0.9764 | 0.8625 | 1.3967 | -6.9442 | -25.3752 | -18.2849 | -17.9367 |
|
133 |
+
| 0.4003 | 3.6 | 720 | 0.3319 | 0.4208 | -0.9802 | 0.8625 | 1.4011 | -6.9507 | -25.3744 | -18.2827 | -17.9348 |
|
134 |
+
| 0.4106 | 3.65 | 730 | 0.3314 | 0.4212 | -0.9837 | 0.8625 | 1.4050 | -6.9565 | -25.3737 | -18.2807 | -17.9331 |
|
135 |
+
| 0.3852 | 3.7 | 740 | 0.3310 | 0.4216 | -0.9868 | 0.8625 | 1.4084 | -6.9617 | -25.3731 | -18.2790 | -17.9317 |
|
136 |
+
| 0.4174 | 3.75 | 750 | 0.3306 | 0.4218 | -0.9898 | 0.8625 | 1.4116 | -6.9665 | -25.3727 | -18.2774 | -17.9303 |
|
137 |
+
| 0.4188 | 3.8 | 760 | 0.3303 | 0.4221 | -0.9922 | 0.8631 | 1.4144 | -6.9707 | -25.3722 | -18.2760 | -17.9291 |
|
138 |
+
| 0.39 | 3.85 | 770 | 0.3300 | 0.4224 | -0.9946 | 0.8631 | 1.4170 | -6.9746 | -25.3717 | -18.2745 | -17.9278 |
|
139 |
+
| 0.3884 | 3.9 | 780 | 0.3297 | 0.4228 | -0.9969 | 0.8631 | 1.4197 | -6.9785 | -25.3711 | -18.2732 | -17.9267 |
|
140 |
+
| 0.4019 | 3.95 | 790 | 0.3294 | 0.4230 | -0.9991 | 0.8631 | 1.4221 | -6.9821 | -25.3707 | -18.2720 | -17.9257 |
|
141 |
+
| 0.3742 | 4.0 | 800 | 0.3292 | 0.4232 | -1.0009 | 0.8631 | 1.4241 | -6.9852 | -25.3704 | -18.2709 | -17.9248 |
|
142 |
+
| 0.4229 | 4.05 | 810 | 0.3289 | 0.4234 | -1.0026 | 0.8631 | 1.4259 | -6.9879 | -25.3701 | -18.2701 | -17.9240 |
|
143 |
+
| 0.4327 | 4.1 | 820 | 0.3288 | 0.4235 | -1.0040 | 0.8631 | 1.4275 | -6.9902 | -25.3699 | -18.2693 | -17.9234 |
|
144 |
+
| 0.4086 | 4.15 | 830 | 0.3286 | 0.4237 | -1.0052 | 0.8631 | 1.4289 | -6.9923 | -25.3696 | -18.2687 | -17.9228 |
|
145 |
+
| 0.3724 | 4.2 | 840 | 0.3285 | 0.4238 | -1.0063 | 0.8631 | 1.4301 | -6.9941 | -25.3694 | -18.2680 | -17.9223 |
|
146 |
+
| 0.4155 | 4.25 | 850 | 0.3283 | 0.4239 | -1.0072 | 0.8631 | 1.4311 | -6.9957 | -25.3692 | -18.2675 | -17.9219 |
|
147 |
+
| 0.378 | 4.3 | 860 | 0.3282 | 0.4240 | -1.0081 | 0.8631 | 1.4321 | -6.9972 | -25.3691 | -18.2670 | -17.9214 |
|
148 |
+
| 0.3837 | 4.35 | 870 | 0.3281 | 0.4240 | -1.0089 | 0.8631 | 1.4329 | -6.9984 | -25.3690 | -18.2667 | -17.9211 |
|
149 |
+
| 0.3666 | 4.4 | 880 | 0.3281 | 0.4241 | -1.0094 | 0.8631 | 1.4335 | -6.9992 | -25.3689 | -18.2664 | -17.9209 |
|
150 |
+
| 0.3775 | 4.45 | 890 | 0.3280 | 0.4242 | -1.0098 | 0.8625 | 1.4340 | -6.9999 | -25.3688 | -18.2662 | -17.9207 |
|
151 |
+
| 0.401 | 4.5 | 900 | 0.3280 | 0.4242 | -1.0101 | 0.8631 | 1.4343 | -7.0004 | -25.3687 | -18.2660 | -17.9206 |
|
152 |
+
| 0.3887 | 4.55 | 910 | 0.3279 | 0.4243 | -1.0104 | 0.8631 | 1.4346 | -7.0009 | -25.3686 | -18.2659 | -17.9205 |
|
153 |
+
| 0.4123 | 4.6 | 920 | 0.3279 | 0.4243 | -1.0106 | 0.8625 | 1.4349 | -7.0013 | -25.3686 | -18.2657 | -17.9204 |
|
154 |
+
| 0.415 | 4.65 | 930 | 0.3279 | 0.4243 | -1.0108 | 0.8625 | 1.4351 | -7.0016 | -25.3686 | -18.2657 | -17.9203 |
|
155 |
+
| 0.4636 | 4.7 | 940 | 0.3279 | 0.4243 | -1.0109 | 0.8625 | 1.4352 | -7.0017 | -25.3685 | -18.2656 | -17.9202 |
|
156 |
+
| 0.3967 | 4.75 | 950 | 0.3279 | 0.4243 | -1.0109 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2656 | -17.9202 |
|
157 |
+
| 0.3853 | 4.8 | 960 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
|
158 |
+
| 0.3831 | 4.85 | 970 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
|
159 |
+
| 0.3945 | 4.9 | 980 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
|
160 |
+
| 0.3882 | 4.95 | 990 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
|
161 |
+
| 0.4374 | 5.0 | 1000 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
|
162 |
|
163 |
|
164 |
### Framework versions
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1187780840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d84aeb5d2abd8769c8bbddc3a11081d12b595cfef908c6bc1b01a0d20e426d99
|
3 |
size 1187780840
|