zera09 commited on
Commit
58c6836
·
verified ·
1 Parent(s): 83f6165

End of training

Browse files
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "zera09/long_t5_4",
3
+ "architectures": [
4
+ "LongT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "encoder_attention_type": "transient-global",
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "global_block_size": 16,
16
+ "initializer_factor": 1.0,
17
+ "is_encoder_decoder": true,
18
+ "is_gated_act": true,
19
+ "layer_norm_epsilon": 1e-06,
20
+ "local_radius": 127,
21
+ "model_type": "longt5",
22
+ "n_positions": 4096,
23
+ "num_decoder_layers": 12,
24
+ "num_heads": 12,
25
+ "num_layers": 12,
26
+ "output_past": true,
27
+ "pad_token_id": 0,
28
+ "relative_attention_max_distance": 128,
29
+ "relative_attention_num_buckets": 32,
30
+ "tie_word_embeddings": false,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.45.2",
33
+ "use_cache": true,
34
+ "vocab_size": 32128
35
+ }
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
- base_model: zera09/dpo_t5_2
5
  tags:
6
  - trl
7
  - dpo
@@ -16,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # dpo_t5_3
18
 
19
- This model is a fine-tuned version of [zera09/dpo_t5_2](https://huggingface.co/zera09/dpo_t5_2) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.5278
22
- - Rewards/chosen: 0.0347
23
- - Rewards/rejected: -0.3816
24
- - Rewards/accuracies: 0.8494
25
- - Rewards/margins: 0.4163
26
- - Logps/rejected: -6.9733
27
- - Logps/chosen: -25.5442
28
- - Logits/rejected: -18.4809
29
- - Logits/chosen: -18.2253
30
 
31
  ## Model description
32
 
@@ -46,21 +46,119 @@ More information needed
46
 
47
  The following hyperparameters were used during training:
48
  - learning_rate: 5e-07
49
- - train_batch_size: 16
50
- - eval_batch_size: 16
51
  - seed: 42
52
  - gradient_accumulation_steps: 4
53
- - total_train_batch_size: 64
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: cosine
56
- - training_steps: 200
57
 
58
  ### Training results
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
- | 0.7475 | 1.0 | 100 | 0.5503 | 0.0318 | -0.3144 | 0.8494 | 0.3461 | -6.8613 | -25.5491 | -18.5139 | -18.2528 |
63
- | 0.6693 | 2.0 | 200 | 0.5278 | 0.0347 | -0.3816 | 0.8494 | 0.4163 | -6.9733 | -25.5442 | -18.4809 | -18.2253 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
 
66
  ### Framework versions
 
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
+ base_model: zera09/long_t5_4
5
  tags:
6
  - trl
7
  - dpo
 
16
 
17
  # dpo_t5_3
18
 
19
+ This model is a fine-tuned version of [zera09/long_t5_4](https://huggingface.co/zera09/long_t5_4) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3279
22
+ - Rewards/chosen: 0.4243
23
+ - Rewards/rejected: -1.0110
24
+ - Rewards/accuracies: 0.8625
25
+ - Rewards/margins: 1.4353
26
+ - Logps/rejected: -7.0019
27
+ - Logps/chosen: -25.3685
28
+ - Logits/rejected: -18.2655
29
+ - Logits/chosen: -17.9202
30
 
31
  ## Model description
32
 
 
46
 
47
  The following hyperparameters were used during training:
48
  - learning_rate: 5e-07
49
+ - train_batch_size: 8
50
+ - eval_batch_size: 32
51
  - seed: 42
52
  - gradient_accumulation_steps: 4
53
+ - total_train_batch_size: 32
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: cosine
56
+ - training_steps: 1000
57
 
58
  ### Training results
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | 0.8579 | 0.05 | 10 | 0.6748 | 0.0198 | -0.0177 | 0.8469 | 0.0375 | -5.3464 | -26.0427 | -18.9879 | -18.5345 |
63
+ | 0.7446 | 0.1 | 20 | 0.6573 | 0.0382 | -0.0362 | 0.8500 | 0.0745 | -5.3773 | -26.0120 | -18.9693 | -18.5188 |
64
+ | 0.7655 | 0.15 | 30 | 0.6406 | 0.0560 | -0.0553 | 0.8506 | 0.1113 | -5.4091 | -25.9824 | -18.9500 | -18.5025 |
65
+ | 0.7752 | 0.2 | 40 | 0.6255 | 0.0722 | -0.0736 | 0.8512 | 0.1458 | -5.4396 | -25.9553 | -18.9314 | -18.4868 |
66
+ | 0.7797 | 0.25 | 50 | 0.6113 | 0.0878 | -0.0916 | 0.8506 | 0.1794 | -5.4696 | -25.9294 | -18.9138 | -18.4718 |
67
+ | 0.7109 | 0.3 | 60 | 0.5975 | 0.1034 | -0.1102 | 0.8519 | 0.2136 | -5.5006 | -25.9034 | -18.8960 | -18.4569 |
68
+ | 0.6863 | 0.35 | 70 | 0.5845 | 0.1186 | -0.1282 | 0.8512 | 0.2468 | -5.5307 | -25.8781 | -18.8798 | -18.4433 |
69
+ | 0.6736 | 0.4 | 80 | 0.5720 | 0.1328 | -0.1470 | 0.8519 | 0.2798 | -5.5619 | -25.8544 | -18.8631 | -18.4291 |
70
+ | 0.6564 | 0.45 | 90 | 0.5601 | 0.1467 | -0.1658 | 0.8519 | 0.3125 | -5.5933 | -25.8312 | -18.8455 | -18.4141 |
71
+ | 0.705 | 0.5 | 100 | 0.5490 | 0.1599 | -0.1843 | 0.8519 | 0.3442 | -5.6241 | -25.8092 | -18.8285 | -18.3996 |
72
+ | 0.6871 | 0.55 | 110 | 0.5380 | 0.1726 | -0.2041 | 0.8531 | 0.3768 | -5.6572 | -25.7880 | -18.8116 | -18.3852 |
73
+ | 0.7134 | 0.6 | 120 | 0.5275 | 0.1850 | -0.2241 | 0.8550 | 0.4091 | -5.6905 | -25.7674 | -18.7943 | -18.3706 |
74
+ | 0.6389 | 0.65 | 130 | 0.5179 | 0.1966 | -0.2433 | 0.8562 | 0.4399 | -5.7224 | -25.7480 | -18.7777 | -18.3565 |
75
+ | 0.6128 | 0.7 | 140 | 0.5087 | 0.2081 | -0.2619 | 0.8569 | 0.4699 | -5.7534 | -25.7289 | -18.7619 | -18.3430 |
76
+ | 0.6281 | 0.75 | 150 | 0.4996 | 0.2197 | -0.2817 | 0.8569 | 0.5014 | -5.7865 | -25.7096 | -18.7467 | -18.3302 |
77
+ | 0.6216 | 0.8 | 160 | 0.4910 | 0.2300 | -0.3019 | 0.8569 | 0.5319 | -5.8201 | -25.6924 | -18.7314 | -18.3175 |
78
+ | 0.6002 | 0.85 | 170 | 0.4828 | 0.2403 | -0.3216 | 0.8575 | 0.5619 | -5.8529 | -25.6752 | -18.7166 | -18.3049 |
79
+ | 0.5649 | 0.9 | 180 | 0.4752 | 0.2501 | -0.3406 | 0.8575 | 0.5908 | -5.8847 | -25.6588 | -18.7023 | -18.2929 |
80
+ | 0.5695 | 0.95 | 190 | 0.4680 | 0.2594 | -0.3595 | 0.8575 | 0.6189 | -5.9162 | -25.6434 | -18.6874 | -18.2802 |
81
+ | 0.5675 | 1.0 | 200 | 0.4611 | 0.2678 | -0.3786 | 0.8575 | 0.6464 | -5.9479 | -25.6293 | -18.6724 | -18.2674 |
82
+ | 0.5146 | 1.05 | 210 | 0.4544 | 0.2762 | -0.3980 | 0.8575 | 0.6742 | -5.9802 | -25.6154 | -18.6581 | -18.2553 |
83
+ | 0.558 | 1.1 | 220 | 0.4482 | 0.2841 | -0.4168 | 0.8581 | 0.7009 | -6.0116 | -25.6021 | -18.6442 | -18.2437 |
84
+ | 0.598 | 1.15 | 230 | 0.4420 | 0.2923 | -0.4358 | 0.8581 | 0.7282 | -6.0433 | -25.5885 | -18.6301 | -18.2318 |
85
+ | 0.5918 | 1.2 | 240 | 0.4360 | 0.3001 | -0.4552 | 0.8581 | 0.7553 | -6.0755 | -25.5755 | -18.6160 | -18.2198 |
86
+ | 0.5576 | 1.25 | 250 | 0.4307 | 0.3064 | -0.4737 | 0.8587 | 0.7802 | -6.1065 | -25.5650 | -18.6033 | -18.2090 |
87
+ | 0.5702 | 1.3 | 260 | 0.4257 | 0.3125 | -0.4916 | 0.8587 | 0.8041 | -6.1363 | -25.5549 | -18.5910 | -18.1985 |
88
+ | 0.5132 | 1.35 | 270 | 0.4209 | 0.3185 | -0.5090 | 0.8581 | 0.8275 | -6.1652 | -25.5449 | -18.5784 | -18.1877 |
89
+ | 0.5752 | 1.4 | 280 | 0.4164 | 0.3240 | -0.5260 | 0.8594 | 0.8500 | -6.1936 | -25.5357 | -18.5661 | -18.1772 |
90
+ | 0.5374 | 1.45 | 290 | 0.4123 | 0.3290 | -0.5419 | 0.8587 | 0.8709 | -6.2202 | -25.5274 | -18.5551 | -18.1678 |
91
+ | 0.49 | 1.5 | 300 | 0.4082 | 0.3343 | -0.5579 | 0.8594 | 0.8922 | -6.2468 | -25.5185 | -18.5447 | -18.1590 |
92
+ | 0.5269 | 1.55 | 310 | 0.4040 | 0.3398 | -0.5748 | 0.8594 | 0.9146 | -6.2749 | -25.5094 | -18.5337 | -18.1497 |
93
+ | 0.4636 | 1.6 | 320 | 0.4001 | 0.3447 | -0.5910 | 0.8600 | 0.9357 | -6.3020 | -25.5012 | -18.5220 | -18.1396 |
94
+ | 0.4493 | 1.65 | 330 | 0.3963 | 0.3492 | -0.6073 | 0.8594 | 0.9565 | -6.3291 | -25.4937 | -18.5108 | -18.1300 |
95
+ | 0.5583 | 1.7 | 340 | 0.3928 | 0.3535 | -0.6228 | 0.8594 | 0.9763 | -6.3550 | -25.4865 | -18.5004 | -18.1211 |
96
+ | 0.5091 | 1.75 | 350 | 0.3895 | 0.3577 | -0.6377 | 0.8594 | 0.9953 | -6.3798 | -25.4796 | -18.4904 | -18.1124 |
97
+ | 0.484 | 1.8 | 360 | 0.3864 | 0.3613 | -0.6521 | 0.8600 | 1.0134 | -6.4038 | -25.4735 | -18.4815 | -18.1048 |
98
+ | 0.434 | 1.85 | 370 | 0.3834 | 0.3650 | -0.6665 | 0.8600 | 1.0315 | -6.4278 | -25.4674 | -18.4729 | -18.0974 |
99
+ | 0.5252 | 1.9 | 380 | 0.3805 | 0.3687 | -0.6809 | 0.8600 | 1.0496 | -6.4518 | -25.4612 | -18.4636 | -18.0894 |
100
+ | 0.5021 | 1.95 | 390 | 0.3778 | 0.3722 | -0.6940 | 0.8606 | 1.0662 | -6.4736 | -25.4554 | -18.4550 | -18.0821 |
101
+ | 0.5079 | 2.0 | 400 | 0.3752 | 0.3754 | -0.7071 | 0.8606 | 1.0825 | -6.4954 | -25.4500 | -18.4466 | -18.0749 |
102
+ | 0.4553 | 2.05 | 410 | 0.3725 | 0.3788 | -0.7208 | 0.8606 | 1.0996 | -6.5184 | -25.4445 | -18.4376 | -18.0672 |
103
+ | 0.4719 | 2.1 | 420 | 0.3700 | 0.3814 | -0.7348 | 0.8606 | 1.1162 | -6.5417 | -25.4401 | -18.4293 | -18.0602 |
104
+ | 0.4917 | 2.15 | 430 | 0.3676 | 0.3839 | -0.7481 | 0.8612 | 1.1321 | -6.5638 | -25.4358 | -18.4212 | -18.0532 |
105
+ | 0.4459 | 2.2 | 440 | 0.3653 | 0.3862 | -0.7614 | 0.8612 | 1.1477 | -6.5860 | -25.4320 | -18.4130 | -18.0462 |
106
+ | 0.4596 | 2.25 | 450 | 0.3631 | 0.3888 | -0.7744 | 0.8612 | 1.1631 | -6.6075 | -25.4278 | -18.4050 | -18.0393 |
107
+ | 0.4018 | 2.3 | 460 | 0.3610 | 0.3913 | -0.7862 | 0.8619 | 1.1775 | -6.6274 | -25.4236 | -18.3975 | -18.0328 |
108
+ | 0.4105 | 2.35 | 470 | 0.3589 | 0.3936 | -0.7986 | 0.8619 | 1.1921 | -6.6479 | -25.4198 | -18.3902 | -18.0267 |
109
+ | 0.4227 | 2.4 | 480 | 0.3571 | 0.3956 | -0.8097 | 0.8619 | 1.2053 | -6.6664 | -25.4164 | -18.3839 | -18.0214 |
110
+ | 0.4584 | 2.45 | 490 | 0.3553 | 0.3975 | -0.8205 | 0.8625 | 1.2180 | -6.6844 | -25.4132 | -18.3780 | -18.0165 |
111
+ | 0.4309 | 2.5 | 500 | 0.3537 | 0.3995 | -0.8299 | 0.8619 | 1.2295 | -6.7002 | -25.4098 | -18.3728 | -18.0121 |
112
+ | 0.4185 | 2.55 | 510 | 0.3522 | 0.4015 | -0.8390 | 0.8625 | 1.2405 | -6.7153 | -25.4066 | -18.3675 | -18.0077 |
113
+ | 0.4103 | 2.6 | 520 | 0.3508 | 0.4033 | -0.8480 | 0.8625 | 1.2512 | -6.7303 | -25.4036 | -18.3622 | -18.0031 |
114
+ | 0.4511 | 2.65 | 530 | 0.3493 | 0.4047 | -0.8570 | 0.8625 | 1.2618 | -6.7454 | -25.4012 | -18.3565 | -17.9982 |
115
+ | 0.4111 | 2.7 | 540 | 0.3479 | 0.4061 | -0.8666 | 0.8625 | 1.2728 | -6.7613 | -25.3988 | -18.3507 | -17.9932 |
116
+ | 0.4192 | 2.75 | 550 | 0.3465 | 0.4074 | -0.8763 | 0.8619 | 1.2837 | -6.7774 | -25.3967 | -18.3451 | -17.9885 |
117
+ | 0.4278 | 2.8 | 560 | 0.3452 | 0.4087 | -0.8848 | 0.8619 | 1.2935 | -6.7916 | -25.3945 | -18.3397 | -17.9838 |
118
+ | 0.4001 | 2.85 | 570 | 0.3439 | 0.4102 | -0.8927 | 0.8619 | 1.3028 | -6.8048 | -25.3921 | -18.3345 | -17.9793 |
119
+ | 0.4006 | 2.9 | 580 | 0.3428 | 0.4112 | -0.9007 | 0.8619 | 1.3119 | -6.8181 | -25.3903 | -18.3294 | -17.9749 |
120
+ | 0.3664 | 2.95 | 590 | 0.3417 | 0.4124 | -0.9084 | 0.8619 | 1.3208 | -6.8309 | -25.3884 | -18.3246 | -17.9707 |
121
+ | 0.4518 | 3.0 | 600 | 0.3406 | 0.4133 | -0.9159 | 0.8619 | 1.3292 | -6.8435 | -25.3869 | -18.3200 | -17.9668 |
122
+ | 0.3931 | 3.05 | 610 | 0.3396 | 0.4140 | -0.9233 | 0.8619 | 1.3374 | -6.8558 | -25.3856 | -18.3157 | -17.9631 |
123
+ | 0.3842 | 3.1 | 620 | 0.3386 | 0.4148 | -0.9300 | 0.8619 | 1.3448 | -6.8670 | -25.3844 | -18.3116 | -17.9596 |
124
+ | 0.3876 | 3.15 | 630 | 0.3378 | 0.4155 | -0.9363 | 0.8612 | 1.3519 | -6.8775 | -25.3832 | -18.3081 | -17.9566 |
125
+ | 0.4318 | 3.2 | 640 | 0.3369 | 0.4163 | -0.9423 | 0.8612 | 1.3586 | -6.8875 | -25.3819 | -18.3046 | -17.9536 |
126
+ | 0.4309 | 3.25 | 650 | 0.3362 | 0.4169 | -0.9481 | 0.8612 | 1.3650 | -6.8971 | -25.3808 | -18.3015 | -17.9509 |
127
+ | 0.3602 | 3.3 | 660 | 0.3354 | 0.4176 | -0.9537 | 0.8619 | 1.3712 | -6.9064 | -25.3798 | -18.2985 | -17.9484 |
128
+ | 0.4113 | 3.35 | 670 | 0.3347 | 0.4182 | -0.9590 | 0.8619 | 1.3771 | -6.9152 | -25.3788 | -18.2955 | -17.9459 |
129
+ | 0.3874 | 3.4 | 680 | 0.3340 | 0.4187 | -0.9641 | 0.8612 | 1.3828 | -6.9237 | -25.3778 | -18.2924 | -17.9431 |
130
+ | 0.4358 | 3.45 | 690 | 0.3334 | 0.4192 | -0.9686 | 0.8619 | 1.3878 | -6.9312 | -25.3770 | -18.2897 | -17.9408 |
131
+ | 0.4318 | 3.5 | 700 | 0.3329 | 0.4197 | -0.9725 | 0.8625 | 1.3923 | -6.9379 | -25.3762 | -18.2873 | -17.9388 |
132
+ | 0.3959 | 3.55 | 710 | 0.3324 | 0.4203 | -0.9764 | 0.8625 | 1.3967 | -6.9442 | -25.3752 | -18.2849 | -17.9367 |
133
+ | 0.4003 | 3.6 | 720 | 0.3319 | 0.4208 | -0.9802 | 0.8625 | 1.4011 | -6.9507 | -25.3744 | -18.2827 | -17.9348 |
134
+ | 0.4106 | 3.65 | 730 | 0.3314 | 0.4212 | -0.9837 | 0.8625 | 1.4050 | -6.9565 | -25.3737 | -18.2807 | -17.9331 |
135
+ | 0.3852 | 3.7 | 740 | 0.3310 | 0.4216 | -0.9868 | 0.8625 | 1.4084 | -6.9617 | -25.3731 | -18.2790 | -17.9317 |
136
+ | 0.4174 | 3.75 | 750 | 0.3306 | 0.4218 | -0.9898 | 0.8625 | 1.4116 | -6.9665 | -25.3727 | -18.2774 | -17.9303 |
137
+ | 0.4188 | 3.8 | 760 | 0.3303 | 0.4221 | -0.9922 | 0.8631 | 1.4144 | -6.9707 | -25.3722 | -18.2760 | -17.9291 |
138
+ | 0.39 | 3.85 | 770 | 0.3300 | 0.4224 | -0.9946 | 0.8631 | 1.4170 | -6.9746 | -25.3717 | -18.2745 | -17.9278 |
139
+ | 0.3884 | 3.9 | 780 | 0.3297 | 0.4228 | -0.9969 | 0.8631 | 1.4197 | -6.9785 | -25.3711 | -18.2732 | -17.9267 |
140
+ | 0.4019 | 3.95 | 790 | 0.3294 | 0.4230 | -0.9991 | 0.8631 | 1.4221 | -6.9821 | -25.3707 | -18.2720 | -17.9257 |
141
+ | 0.3742 | 4.0 | 800 | 0.3292 | 0.4232 | -1.0009 | 0.8631 | 1.4241 | -6.9852 | -25.3704 | -18.2709 | -17.9248 |
142
+ | 0.4229 | 4.05 | 810 | 0.3289 | 0.4234 | -1.0026 | 0.8631 | 1.4259 | -6.9879 | -25.3701 | -18.2701 | -17.9240 |
143
+ | 0.4327 | 4.1 | 820 | 0.3288 | 0.4235 | -1.0040 | 0.8631 | 1.4275 | -6.9902 | -25.3699 | -18.2693 | -17.9234 |
144
+ | 0.4086 | 4.15 | 830 | 0.3286 | 0.4237 | -1.0052 | 0.8631 | 1.4289 | -6.9923 | -25.3696 | -18.2687 | -17.9228 |
145
+ | 0.3724 | 4.2 | 840 | 0.3285 | 0.4238 | -1.0063 | 0.8631 | 1.4301 | -6.9941 | -25.3694 | -18.2680 | -17.9223 |
146
+ | 0.4155 | 4.25 | 850 | 0.3283 | 0.4239 | -1.0072 | 0.8631 | 1.4311 | -6.9957 | -25.3692 | -18.2675 | -17.9219 |
147
+ | 0.378 | 4.3 | 860 | 0.3282 | 0.4240 | -1.0081 | 0.8631 | 1.4321 | -6.9972 | -25.3691 | -18.2670 | -17.9214 |
148
+ | 0.3837 | 4.35 | 870 | 0.3281 | 0.4240 | -1.0089 | 0.8631 | 1.4329 | -6.9984 | -25.3690 | -18.2667 | -17.9211 |
149
+ | 0.3666 | 4.4 | 880 | 0.3281 | 0.4241 | -1.0094 | 0.8631 | 1.4335 | -6.9992 | -25.3689 | -18.2664 | -17.9209 |
150
+ | 0.3775 | 4.45 | 890 | 0.3280 | 0.4242 | -1.0098 | 0.8625 | 1.4340 | -6.9999 | -25.3688 | -18.2662 | -17.9207 |
151
+ | 0.401 | 4.5 | 900 | 0.3280 | 0.4242 | -1.0101 | 0.8631 | 1.4343 | -7.0004 | -25.3687 | -18.2660 | -17.9206 |
152
+ | 0.3887 | 4.55 | 910 | 0.3279 | 0.4243 | -1.0104 | 0.8631 | 1.4346 | -7.0009 | -25.3686 | -18.2659 | -17.9205 |
153
+ | 0.4123 | 4.6 | 920 | 0.3279 | 0.4243 | -1.0106 | 0.8625 | 1.4349 | -7.0013 | -25.3686 | -18.2657 | -17.9204 |
154
+ | 0.415 | 4.65 | 930 | 0.3279 | 0.4243 | -1.0108 | 0.8625 | 1.4351 | -7.0016 | -25.3686 | -18.2657 | -17.9203 |
155
+ | 0.4636 | 4.7 | 940 | 0.3279 | 0.4243 | -1.0109 | 0.8625 | 1.4352 | -7.0017 | -25.3685 | -18.2656 | -17.9202 |
156
+ | 0.3967 | 4.75 | 950 | 0.3279 | 0.4243 | -1.0109 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2656 | -17.9202 |
157
+ | 0.3853 | 4.8 | 960 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
158
+ | 0.3831 | 4.85 | 970 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
159
+ | 0.3945 | 4.9 | 980 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
160
+ | 0.3882 | 4.95 | 990 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
161
+ | 0.4374 | 5.0 | 1000 | 0.3279 | 0.4243 | -1.0110 | 0.8625 | 1.4353 | -7.0019 | -25.3685 | -18.2655 | -17.9202 |
162
 
163
 
164
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b701fae59f0861bd2d795498a57033b33d2dc0cd703489bfb423d091ccd00d
3
  size 1187780840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d84aeb5d2abd8769c8bbddc3a11081d12b595cfef908c6bc1b01a0d20e426d99
3
  size 1187780840