Model save
Browse files- README.md +79 -83
- adapter_config.json +5 -5
- all_results.json +7 -7
- eval_results.json +3 -3
- runs/Jan25_08-45-35_897544936d51/events.out.tfevents.1706172432.897544936d51.14729.0 +3 -0
- runs/Jan25_08-45-35_897544936d51/events.out.tfevents.1706173974.897544936d51.14729.1 +3 -0
- train_results.json +4 -4
- trainer_state.json +4 -4
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,13 +2,9 @@
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- generated_from_trainer
|
7 |
- trl
|
8 |
- dpo
|
9 |
- generated_from_trainer
|
10 |
-
datasets:
|
11 |
-
- HuggingFaceH4/ultrafeedback_binarized
|
12 |
base_model: mistralai/Mistral-7B-v0.1
|
13 |
model-index:
|
14 |
- name: zephyr-7b-dpo-qlora
|
@@ -20,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
# zephyr-7b-dpo-qlora
|
22 |
|
23 |
-
This model is a fine-tuned version of [
|
24 |
It achieves the following results on the evaluation set:
|
25 |
- Loss: 0.5058
|
26 |
- Rewards/chosen: -2.0144
|
@@ -63,84 +59,84 @@ The following hyperparameters were used during training:
|
|
63 |
|
64 |
### Training results
|
65 |
|
66 |
-
| Training Loss | Epoch | Step |
|
67 |
-
|
68 |
-
| 0.6934 | 0.01 | 100 |
|
69 |
-
| 0.6924 | 0.03 | 200 |
|
70 |
-
| 0.691 | 0.04 | 300 |
|
71 |
-
| 0.6876 | 0.05 | 400 |
|
72 |
-
| 0.6799 | 0.07 | 500 |
|
73 |
-
| 0.6577 | 0.08 | 600 |
|
74 |
-
| 0.6365 | 0.09 | 700 |
|
75 |
-
| 0.6519 | 0.1 | 800 |
|
76 |
-
| 0.6547 | 0.12 | 900 |
|
77 |
-
| 0.5841 | 0.13 | 1000 |
|
78 |
-
| 0.5877 | 0.14 | 1100 |
|
79 |
-
| 0.5552 | 0.16 | 1200 | 0.
|
80 |
-
| 0.5492 | 0.17 | 1300 | 0.
|
81 |
-
| 0.5789 | 0.18 | 1400 | 0.
|
82 |
-
| 0.5456 | 0.2 | 1500 | 0.
|
83 |
-
| 0.4722 | 0.21 | 1600 | 0.
|
84 |
-
| 0.5072 | 0.22 | 1700 | 0.
|
85 |
-
| 0.5284 | 0.24 | 1800 | 0.
|
86 |
-
| 0.5623 | 0.25 | 1900 | 0.
|
87 |
-
| 0.6092 | 0.26 | 2000 | 0.
|
88 |
-
| 0.5726 | 0.27 | 2100 | 0.
|
89 |
-
| 0.5323 | 0.29 | 2200 | 0.
|
90 |
-
| 0.5148 | 0.3 | 2300 | 0.
|
91 |
-
| 0.4112 | 0.31 | 2400 | 0.
|
92 |
-
| 0.5246 | 0.33 | 2500 |
|
93 |
-
| 0.5657 | 0.34 | 2600 | 0.
|
94 |
-
| 0.5216 | 0.35 | 2700 |
|
95 |
-
| 0.5865 | 0.37 | 2800 |
|
96 |
-
| 0.5252 | 0.38 | 2900 | 0.
|
97 |
-
| 0.5381 | 0.39 | 3000 |
|
98 |
-
| 0.4587 | 0.41 | 3100 |
|
99 |
-
| 0.5173 | 0.42 | 3200 | 0.
|
100 |
-
| 0.5851 | 0.43 | 3300 |
|
101 |
-
| 0.5811 | 0.44 | 3400 |
|
102 |
-
| 0.4892 | 0.46 | 3500 |
|
103 |
-
| 0.5438 | 0.47 | 3600 |
|
104 |
-
| 0.5677 | 0.48 | 3700 |
|
105 |
-
| 0.5471 | 0.5 | 3800 |
|
106 |
-
| 0.5193 | 0.51 | 3900 |
|
107 |
-
| 0.5764 | 0.52 | 4000 |
|
108 |
-
| 0.504 | 0.54 | 4100 |
|
109 |
-
| 0.4846 | 0.55 | 4200 |
|
110 |
-
| 0.426 | 0.56 | 4300 |
|
111 |
-
| 0.5289 | 0.58 | 4400 |
|
112 |
-
| 0.4542 | 0.59 | 4500 |
|
113 |
-
| 0.4839 | 0.6 | 4600 |
|
114 |
-
| 0.5562 | 0.62 | 4700 |
|
115 |
-
| 0.4964 | 0.63 | 4800 |
|
116 |
-
| 0.4879 | 0.64 | 4900 |
|
117 |
-
| 0.4916 | 0.65 | 5000 |
|
118 |
-
| 0.5806 | 0.67 | 5100 |
|
119 |
-
| 0.5027 | 0.68 | 5200 |
|
120 |
-
| 0.4554 | 0.69 | 5300 |
|
121 |
-
| 0.4521 | 0.71 | 5400 |
|
122 |
-
| 0.5869 | 0.72 | 5500 |
|
123 |
-
| 0.5924 | 0.73 | 5600 | 0.
|
124 |
-
| 0.4275 | 0.75 | 5700 | 0.
|
125 |
-
| 0.4746 | 0.76 | 5800 |
|
126 |
-
| 0.5033 | 0.77 | 5900 | 0.
|
127 |
-
| 0.4517 | 0.79 | 6000 | 0.
|
128 |
-
| 0.5071 | 0.8 | 6100 | 0.
|
129 |
-
| 0.6455 | 0.81 | 6200 | 0.
|
130 |
-
| 0.4796 | 0.82 | 6300 |
|
131 |
-
| 0.5568 | 0.84 | 6400 |
|
132 |
-
| 0.4335 | 0.85 | 6500 |
|
133 |
-
| 0.5263 | 0.86 | 6600 |
|
134 |
-
| 0.5263 | 0.88 | 6700 |
|
135 |
-
| 0.4939 | 0.89 | 6800 |
|
136 |
-
| 0.5763 | 0.9 | 6900 |
|
137 |
-
| 0.5062 | 0.92 | 7000 |
|
138 |
-
| 0.4432 | 0.93 | 7100 |
|
139 |
-
| 0.5294 | 0.94 | 7200 |
|
140 |
-
| 0.4488 | 0.96 | 7300 |
|
141 |
-
| 0.4747 | 0.97 | 7400 |
|
142 |
-
| 0.4494 | 0.98 | 7500 |
|
143 |
-
| 0.5319 | 0.99 | 7600 |
|
144 |
|
145 |
|
146 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
|
|
|
|
8 |
base_model: mistralai/Mistral-7B-v0.1
|
9 |
model-index:
|
10 |
- name: zephyr-7b-dpo-qlora
|
|
|
16 |
|
17 |
# zephyr-7b-dpo-qlora
|
18 |
|
19 |
+
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
- Loss: 0.5058
|
22 |
- Rewards/chosen: -2.0144
|
|
|
59 |
|
60 |
### Training results
|
61 |
|
62 |
+
| Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
|
63 |
+
|:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
|
64 |
+
| 0.6934 | 0.01 | 100 | -2.5261 | -2.4383 | -268.4692 | -248.5731 | 0.6931 | 0.5105 | 0.0002 | 0.0001 | 0.0001 |
|
65 |
+
| 0.6924 | 0.03 | 200 | -2.5247 | -2.4368 | -268.3451 | -248.5511 | 0.6926 | 0.5605 | 0.0014 | 0.0011 | 0.0003 |
|
66 |
+
| 0.691 | 0.04 | 300 | -2.5253 | -2.4378 | -267.5839 | -248.1753 | 0.6907 | 0.6440 | 0.0091 | 0.0050 | 0.0041 |
|
67 |
+
| 0.6876 | 0.05 | 400 | -2.5230 | -2.4351 | -264.4353 | -246.3089 | 0.6845 | 0.6580 | 0.0405 | 0.0178 | 0.0227 |
|
68 |
+
| 0.6799 | 0.07 | 500 | -2.4660 | -2.3755 | -264.9495 | -249.9276 | 0.6707 | 0.6815 | 0.0354 | 0.0489 | -0.0135 |
|
69 |
+
| 0.6577 | 0.08 | 600 | -2.3601 | -2.2541 | -280.7885 | -272.3604 | 0.6462 | 0.6750 | -0.1230 | 0.1148 | -0.2378 |
|
70 |
+
| 0.6365 | 0.09 | 700 | -2.3136 | -2.2013 | -277.0453 | -272.2037 | 0.6345 | 0.6860 | -0.0856 | 0.1507 | -0.2362 |
|
71 |
+
| 0.6519 | 0.1 | 800 | -2.1835 | -2.0482 | -317.9223 | -320.8872 | 0.6240 | 0.6630 | -0.4943 | 0.2287 | -0.7231 |
|
72 |
+
| 0.6547 | 0.12 | 900 | -2.2184 | -2.0783 | -325.8177 | -331.4542 | 0.6203 | 0.6695 | -0.5733 | 0.2555 | -0.8287 |
|
73 |
+
| 0.5841 | 0.13 | 1000 | -2.2086 | -2.0689 | -322.0998 | -334.5816 | 0.6071 | 0.6820 | -0.5361 | 0.3239 | -0.8600 |
|
74 |
+
| 0.5877 | 0.14 | 1100 | -1.3836 | -1.1053 | -383.4380 | -410.8678 | 0.5947 | 0.6855 | -1.1495 | 0.4734 | -1.6229 |
|
75 |
+
| 0.5552 | 0.16 | 1200 | -0.7372 | -0.3614 | -411.0459 | -437.9200 | 0.5909 | 0.6880 | -1.4256 | 0.4678 | -1.8934 |
|
76 |
+
| 0.5492 | 0.17 | 1300 | -0.5949 | -0.1933 | -414.6323 | -446.2910 | 0.5791 | 0.6935 | -1.4614 | 0.5157 | -1.9771 |
|
77 |
+
| 0.5789 | 0.18 | 1400 | -0.5846 | -0.1908 | -356.4832 | -384.9109 | 0.5771 | 0.7035 | -0.8799 | 0.4834 | -1.3633 |
|
78 |
+
| 0.5456 | 0.2 | 1500 | -0.1574 | 0.3098 | -386.9436 | -427.7158 | 0.5646 | 0.7035 | -1.1845 | 0.6068 | -1.7913 |
|
79 |
+
| 0.4722 | 0.21 | 1600 | 0.0346 | 0.5395 | -400.9113 | -442.8174 | 0.5598 | 0.7075 | -1.3242 | 0.6181 | -1.9424 |
|
80 |
+
| 0.5072 | 0.22 | 1700 | 0.4657 | 1.0411 | -418.8860 | -465.2537 | 0.5574 | 0.7060 | -1.5040 | 0.6628 | -2.1667 |
|
81 |
+
| 0.5284 | 0.24 | 1800 | 0.6528 | 1.2404 | -423.3542 | -469.1293 | 0.5534 | 0.7070 | -1.5486 | 0.6568 | -2.2055 |
|
82 |
+
| 0.5623 | 0.25 | 1900 | 0.3058 | 0.7808 | -439.5539 | -491.0526 | 0.5625 | 0.7055 | -1.7106 | 0.7141 | -2.4247 |
|
83 |
+
| 0.6092 | 0.26 | 2000 | 0.0079 | 0.5199 | -370.0728 | -413.7089 | 0.5501 | 0.7085 | -1.0158 | 0.6354 | -1.6513 |
|
84 |
+
| 0.5726 | 0.27 | 2100 | 0.4405 | 0.9981 | -415.4569 | -464.3842 | 0.5433 | 0.7150 | -1.4697 | 0.6884 | -2.1580 |
|
85 |
+
| 0.5323 | 0.29 | 2200 | 0.7445 | 1.3533 | -400.2244 | -457.4451 | 0.5483 | 0.7150 | -1.3173 | 0.7713 | -2.0886 |
|
86 |
+
| 0.5148 | 0.3 | 2300 | 0.5107 | 1.1454 | -400.4308 | -450.4646 | 0.5387 | 0.7275 | -1.3194 | 0.6994 | -2.0188 |
|
87 |
+
| 0.4112 | 0.31 | 2400 | 0.6648 | 1.2866 | -430.5040 | -490.7723 | 0.5401 | 0.7200 | -1.6201 | 0.8018 | -2.4219 |
|
88 |
+
| 0.5246 | 0.33 | 2500 | 1.0914 | 1.7388 | -481.2729 | -538.2222 | 0.5413 | 0.7220 | -2.1278 | 0.7686 | -2.8964 |
|
89 |
+
| 0.5657 | 0.34 | 2600 | 0.9886 | 1.6571 | -437.1172 | -495.0003 | 0.5373 | 0.7200 | -1.6863 | 0.7779 | -2.4642 |
|
90 |
+
| 0.5216 | 0.35 | 2700 | 1.1290 | 1.7936 | -467.4365 | -522.5278 | 0.5357 | 0.7260 | -1.9895 | 0.7500 | -2.7395 |
|
91 |
+
| 0.5865 | 0.37 | 2800 | 1.1019 | 1.7565 | -478.5605 | -529.6149 | 0.5351 | 0.7260 | -2.1007 | 0.7096 | -2.8103 |
|
92 |
+
| 0.5252 | 0.38 | 2900 | 0.9108 | 1.5686 | -426.6496 | -492.7397 | 0.5376 | 0.7205 | -1.5816 | 0.8600 | -2.4416 |
|
93 |
+
| 0.5381 | 0.39 | 3000 | 1.0233 | 1.7206 | -422.6485 | -485.7741 | 0.5306 | 0.7230 | -1.5416 | 0.8303 | -2.3719 |
|
94 |
+
| 0.4587 | 0.41 | 3100 | 1.1221 | 1.8445 | -413.6005 | -467.0778 | 0.5222 | 0.7260 | -1.4511 | 0.7339 | -2.1850 |
|
95 |
+
| 0.5173 | 0.42 | 3200 | 0.8981 | 1.6186 | -403.9989 | -462.4095 | 0.5277 | 0.7260 | -1.3551 | 0.7832 | -2.1383 |
|
96 |
+
| 0.5851 | 0.43 | 3300 | 1.2860 | 2.0344 | -437.1258 | -498.6931 | 0.5181 | 0.7325 | -1.6864 | 0.8148 | -2.5011 |
|
97 |
+
| 0.5811 | 0.44 | 3400 | 1.0162 | 1.7238 | -428.5590 | -492.4408 | 0.5166 | 0.7335 | -1.6007 | 0.8379 | -2.4386 |
|
98 |
+
| 0.4892 | 0.46 | 3500 | 1.3014 | 2.0709 | -415.6104 | -480.9519 | 0.5257 | 0.7280 | -1.4712 | 0.8525 | -2.3237 |
|
99 |
+
| 0.5438 | 0.47 | 3600 | 1.4150 | 2.2020 | -428.1592 | -493.0664 | 0.5252 | 0.7275 | -1.5967 | 0.8482 | -2.4449 |
|
100 |
+
| 0.5677 | 0.48 | 3700 | 1.6843 | 2.4678 | -465.7504 | -529.8630 | 0.5152 | 0.7275 | -1.9726 | 0.8402 | -2.8128 |
|
101 |
+
| 0.5471 | 0.5 | 3800 | 1.4352 | 2.2022 | -475.7978 | -551.5833 | 0.5240 | 0.7255 | -2.0731 | 0.9569 | -3.0300 |
|
102 |
+
| 0.5193 | 0.51 | 3900 | 1.3990 | 2.1469 | -485.6194 | -559.7596 | 0.5185 | 0.7340 | -2.1713 | 0.9405 | -3.1118 |
|
103 |
+
| 0.5764 | 0.52 | 4000 | 1.1192 | 1.8653 | -469.0576 | -545.9298 | 0.5177 | 0.7310 | -2.0057 | 0.9678 | -2.9735 |
|
104 |
+
| 0.504 | 0.54 | 4100 | 1.0344 | 1.7948 | -450.8565 | -523.1135 | 0.5180 | 0.7270 | -1.8237 | 0.9217 | -2.7453 |
|
105 |
+
| 0.4846 | 0.55 | 4200 | 1.3329 | 2.1064 | -480.6317 | -553.0635 | 0.5168 | 0.7260 | -2.1214 | 0.9234 | -3.0448 |
|
106 |
+
| 0.426 | 0.56 | 4300 | 1.2900 | 2.0377 | -469.9074 | -543.4855 | 0.5096 | 0.7325 | -2.0142 | 0.9349 | -2.9490 |
|
107 |
+
| 0.5289 | 0.58 | 4400 | 1.0286 | 1.7669 | -464.7332 | -542.2659 | 0.5143 | 0.7260 | -1.9624 | 0.9744 | -2.9368 |
|
108 |
+
| 0.4542 | 0.59 | 4500 | 1.1395 | 1.8775 | -464.9223 | -541.3861 | 0.5102 | 0.7335 | -1.9643 | 0.9637 | -2.9280 |
|
109 |
+
| 0.4839 | 0.6 | 4600 | 1.1472 | 1.8858 | -468.8564 | -546.4150 | 0.5094 | 0.7305 | -2.0037 | 0.9747 | -2.9783 |
|
110 |
+
| 0.5562 | 0.62 | 4700 | 1.1999 | 1.9384 | -471.0873 | -546.7677 | 0.5076 | 0.7340 | -2.0260 | 0.9559 | -2.9819 |
|
111 |
+
| 0.4964 | 0.63 | 4800 | 1.3968 | 2.1538 | -485.7305 | -561.4290 | 0.5078 | 0.7335 | -2.1724 | 0.9561 | -3.1285 |
|
112 |
+
| 0.4879 | 0.64 | 4900 | 1.3802 | 2.1324 | -489.5623 | -571.5599 | 0.5125 | 0.7310 | -2.2107 | 1.0191 | -3.2298 |
|
113 |
+
| 0.4916 | 0.65 | 5000 | 1.3780 | 2.1161 | -478.1451 | -558.6430 | 0.5087 | 0.7300 | -2.0966 | 1.0041 | -3.1006 |
|
114 |
+
| 0.5806 | 0.67 | 5100 | 1.3595 | 2.0897 | -491.2838 | -572.3604 | 0.5089 | 0.7305 | -2.2279 | 1.0099 | -3.2378 |
|
115 |
+
| 0.5027 | 0.68 | 5200 | 1.0714 | 1.8014 | -458.1095 | -531.8434 | 0.5038 | 0.7375 | -1.8962 | 0.9364 | -2.8326 |
|
116 |
+
| 0.4554 | 0.69 | 5300 | 1.1555 | 1.8905 | -463.9870 | -540.6600 | 0.5052 | 0.7330 | -1.9550 | 0.9658 | -2.9208 |
|
117 |
+
| 0.4521 | 0.71 | 5400 | 1.1076 | 1.8437 | -467.6124 | -543.2982 | 0.5039 | 0.7370 | -1.9912 | 0.9559 | -2.9472 |
|
118 |
+
| 0.5869 | 0.72 | 5500 | 1.1574 | 1.8865 | -485.5281 | -564.9521 | 0.5054 | 0.7360 | -2.1704 | 0.9933 | -3.1637 |
|
119 |
+
| 0.5924 | 0.73 | 5600 | 0.8215 | 1.5325 | -450.2935 | -527.0139 | 0.5064 | 0.7320 | -1.8180 | 0.9663 | -2.7843 |
|
120 |
+
| 0.4275 | 0.75 | 5700 | 0.9960 | 1.7229 | -469.1932 | -549.8819 | 0.5055 | 0.7340 | -2.0070 | 1.0060 | -3.0130 |
|
121 |
+
| 0.4746 | 0.76 | 5800 | 1.1168 | 1.8507 | -489.1825 | -573.2806 | 0.5072 | 0.7300 | -2.2069 | 1.0401 | -3.2470 |
|
122 |
+
| 0.5033 | 0.77 | 5900 | 0.9675 | 1.7071 | -458.1062 | -536.0162 | 0.5061 | 0.7275 | -1.8962 | 0.9782 | -2.8744 |
|
123 |
+
| 0.4517 | 0.79 | 6000 | 0.8156 | 1.5613 | -441.7279 | -516.7132 | 0.5105 | 0.7265 | -1.7324 | 0.9489 | -2.6813 |
|
124 |
+
| 0.5071 | 0.8 | 6100 | 0.9370 | 1.6895 | -454.8272 | -534.7506 | 0.5116 | 0.7275 | -1.8634 | 0.9983 | -2.8617 |
|
125 |
+
| 0.6455 | 0.81 | 6200 | 0.9542 | 1.7120 | -456.4508 | -536.0126 | 0.5110 | 0.7250 | -1.8796 | 0.9947 | -2.8743 |
|
126 |
+
| 0.4796 | 0.82 | 6300 | 1.0203 | 1.7784 | -460.9879 | -543.0519 | 0.5112 | 0.7260 | -1.9250 | 1.0197 | -2.9447 |
|
127 |
+
| 0.5568 | 0.84 | 6400 | 1.1152 | 1.8764 | -463.8810 | -545.5328 | 0.5086 | 0.7275 | -1.9539 | 1.0156 | -2.9695 |
|
128 |
+
| 0.4335 | 0.85 | 6500 | 1.1822 | 1.9425 | -468.9681 | -550.4982 | 0.5067 | 0.7295 | -2.0048 | 1.0144 | -3.0192 |
|
129 |
+
| 0.5263 | 0.86 | 6600 | 1.1806 | 1.9390 | -465.3099 | -546.2759 | 0.5066 | 0.7310 | -1.9682 | 1.0087 | -2.9769 |
|
130 |
+
| 0.5263 | 0.88 | 6700 | 1.1794 | 1.9366 | -465.6784 | -546.6119 | 0.5066 | 0.7320 | -1.9719 | 1.0084 | -2.9803 |
|
131 |
+
| 0.4939 | 0.89 | 6800 | 1.2238 | 1.9795 | -470.5374 | -551.8629 | 0.5063 | 0.7325 | -2.0205 | 1.0123 | -3.0328 |
|
132 |
+
| 0.5763 | 0.9 | 6900 | 1.2027 | 1.9579 | -469.4713 | -550.4863 | 0.5060 | 0.7330 | -2.0098 | 1.0092 | -3.0191 |
|
133 |
+
| 0.5062 | 0.92 | 7000 | 1.2018 | 1.9574 | -468.7946 | -549.6514 | 0.5059 | 0.7320 | -2.0030 | 1.0077 | -3.0107 |
|
134 |
+
| 0.4432 | 0.93 | 7100 | 1.2115 | 1.9675 | -469.8141 | -550.7594 | 0.5059 | 0.7330 | -2.0132 | 1.0085 | -3.0218 |
|
135 |
+
| 0.5294 | 0.94 | 7200 | 1.2123 | 1.9679 | -469.9014 | -550.8820 | 0.5059 | 0.7315 | -2.0141 | 1.0089 | -3.0230 |
|
136 |
+
| 0.4488 | 0.96 | 7300 | 1.2130 | 1.9688 | -469.9289 | -550.9682 | 0.5058 | 0.7320 | -2.0144 | 1.0095 | -3.0239 |
|
137 |
+
| 0.4747 | 0.97 | 7400 | 1.2122 | 1.9679 | -469.9052 | -550.9178 | 0.5057 | 0.7325 | -2.0142 | 1.0092 | -3.0234 |
|
138 |
+
| 0.4494 | 0.98 | 7500 | 1.2121 | 1.9679 | -469.9345 | -550.9584 | 0.5058 | 0.7350 | -2.0144 | 1.0093 | -3.0238 |
|
139 |
+
| 0.5319 | 0.99 | 7600 | 1.2121 | 1.9679 | -469.9345 | -550.9584 | 0.5058 | 0.7350 | -2.0144 | 1.0093 | -3.0238 |
|
140 |
|
141 |
|
142 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -19,13 +19,13 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"q_proj",
|
23 |
-
"up_proj",
|
24 |
-
"o_proj",
|
25 |
-
"k_proj",
|
26 |
"v_proj",
|
27 |
"gate_proj",
|
28 |
-
"
|
|
|
|
|
|
|
|
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
|
|
|
|
|
|
|
|
22 |
"v_proj",
|
23 |
"gate_proj",
|
24 |
+
"o_proj",
|
25 |
+
"k_proj",
|
26 |
+
"up_proj",
|
27 |
+
"down_proj",
|
28 |
+
"q_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
all_results.json
CHANGED
@@ -9,13 +9,13 @@
|
|
9 |
"eval_rewards/chosen": -2.0144448280334473,
|
10 |
"eval_rewards/margins": 1.0093281269073486,
|
11 |
"eval_rewards/rejected": -3.023772954940796,
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 1.
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
-
"train_loss": 0.
|
17 |
-
"train_runtime":
|
18 |
"train_samples": 61135,
|
19 |
-
"train_samples_per_second":
|
20 |
-
"train_steps_per_second":
|
21 |
}
|
|
|
9 |
"eval_rewards/chosen": -2.0144448280334473,
|
10 |
"eval_rewards/margins": 1.0093281269073486,
|
11 |
"eval_rewards/rejected": -3.023772954940796,
|
12 |
+
"eval_runtime": 1180.5777,
|
13 |
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 1.694,
|
15 |
+
"eval_steps_per_second": 0.847,
|
16 |
+
"train_loss": 0.0026284047499827543,
|
17 |
+
"train_runtime": 361.3652,
|
18 |
"train_samples": 61135,
|
19 |
+
"train_samples_per_second": 169.178,
|
20 |
+
"train_steps_per_second": 21.145
|
21 |
}
|
eval_results.json
CHANGED
@@ -9,8 +9,8 @@
|
|
9 |
"eval_rewards/chosen": -2.0144448280334473,
|
10 |
"eval_rewards/margins": 1.0093281269073486,
|
11 |
"eval_rewards/rejected": -3.023772954940796,
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 1.
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
}
|
|
|
9 |
"eval_rewards/chosen": -2.0144448280334473,
|
10 |
"eval_rewards/margins": 1.0093281269073486,
|
11 |
"eval_rewards/rejected": -3.023772954940796,
|
12 |
+
"eval_runtime": 1180.5777,
|
13 |
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 1.694,
|
15 |
+
"eval_steps_per_second": 0.847
|
16 |
}
|
runs/Jan25_08-45-35_897544936d51/events.out.tfevents.1706172432.897544936d51.14729.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43263219c6ef905a5e9b2e8d04b88fba5e7714fcfbff1319d2891e798e646198
|
3 |
+
size 7439
|
runs/Jan25_08-45-35_897544936d51/events.out.tfevents.1706173974.897544936d51.14729.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f839ce7963a40ec2bf5bf45a1cbb8c8a6ef7e3adce2724393423567597b83d7
|
3 |
+
size 828
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 61135,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.0026284047499827543,
|
4 |
+
"train_runtime": 361.3652,
|
5 |
"train_samples": 61135,
|
6 |
+
"train_samples_per_second": 169.178,
|
7 |
+
"train_steps_per_second": 21.145
|
8 |
}
|
trainer_state.json
CHANGED
@@ -11938,10 +11938,10 @@
|
|
11938 |
"epoch": 1.0,
|
11939 |
"step": 7641,
|
11940 |
"total_flos": 0.0,
|
11941 |
-
"train_loss": 0.
|
11942 |
-
"train_runtime":
|
11943 |
-
"train_samples_per_second":
|
11944 |
-
"train_steps_per_second":
|
11945 |
}
|
11946 |
],
|
11947 |
"logging_steps": 10,
|
|
|
11938 |
"epoch": 1.0,
|
11939 |
"step": 7641,
|
11940 |
"total_flos": 0.0,
|
11941 |
+
"train_loss": 0.0026284047499827543,
|
11942 |
+
"train_runtime": 361.3652,
|
11943 |
+
"train_samples_per_second": 169.178,
|
11944 |
+
"train_steps_per_second": 21.145
|
11945 |
}
|
11946 |
],
|
11947 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3066614533d569ebba8e0193f5911e3dd110cfda1dc478e054931f7a0e36d9c
|
3 |
size 4856
|