Model save
Browse files- README.md +14 -20
- all_results.json +16 -16
- config.json +1 -1
- eval_results.json +12 -12
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- runs/Apr29_18-08-25_gcp002/events.out.tfevents.1714414192.gcp002.13104.0 +3 -0
- train_results.json +4 -4
- trainer_state.json +121 -121
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,16 +2,10 @@
|
|
2 |
license: other
|
3 |
base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- trl
|
7 |
-
- dpo
|
8 |
-
- generated_from_trainer
|
9 |
- trl
|
10 |
- dpo
|
11 |
- alignment-handbook
|
12 |
- generated_from_trainer
|
13 |
-
datasets:
|
14 |
-
- argilla/dpo-mix-7k
|
15 |
model-index:
|
16 |
- name: zephyr-7b-gemma-dpo
|
17 |
results: []
|
@@ -22,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
22 |
|
23 |
# zephyr-7b-gemma-dpo
|
24 |
|
25 |
-
This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on
|
26 |
It achieves the following results on the evaluation set:
|
27 |
-
- Loss: 0.
|
28 |
-
- Rewards/chosen: -
|
29 |
-
- Rewards/rejected: -5.
|
30 |
-
- Rewards/accuracies: 0.
|
31 |
-
- Rewards/margins: 1.
|
32 |
-
- Logps/rejected: -
|
33 |
-
- Logps/chosen: -
|
34 |
-
- Logits/rejected:
|
35 |
-
- Logits/chosen:
|
36 |
|
37 |
## Model description
|
38 |
|
@@ -56,10 +50,10 @@ The following hyperparameters were used during training:
|
|
56 |
- eval_batch_size: 4
|
57 |
- seed: 42
|
58 |
- distributed_type: multi-GPU
|
59 |
-
- num_devices:
|
60 |
-
- gradient_accumulation_steps:
|
61 |
- total_train_batch_size: 128
|
62 |
-
- total_eval_batch_size:
|
63 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
64 |
- lr_scheduler_type: cosine
|
65 |
- lr_scheduler_warmup_ratio: 0.1
|
@@ -69,7 +63,7 @@ The following hyperparameters were used during training:
|
|
69 |
|
70 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
71 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
72 |
-
| 0.
|
73 |
|
74 |
|
75 |
### Framework versions
|
|
|
2 |
license: other
|
3 |
base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
|
4 |
tags:
|
|
|
|
|
|
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- alignment-handbook
|
8 |
- generated_from_trainer
|
|
|
|
|
9 |
model-index:
|
10 |
- name: zephyr-7b-gemma-dpo
|
11 |
results: []
|
|
|
16 |
|
17 |
# zephyr-7b-gemma-dpo
|
18 |
|
19 |
+
This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on an unknown dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.4643
|
22 |
+
- Rewards/chosen: -3.5909
|
23 |
+
- Rewards/rejected: -5.3391
|
24 |
+
- Rewards/accuracies: 0.75
|
25 |
+
- Rewards/margins: 1.7481
|
26 |
+
- Logps/rejected: -515.7638
|
27 |
+
- Logps/chosen: -428.1683
|
28 |
+
- Logits/rejected: 94.0722
|
29 |
+
- Logits/chosen: 91.3541
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
50 |
- eval_batch_size: 4
|
51 |
- seed: 42
|
52 |
- distributed_type: multi-GPU
|
53 |
+
- num_devices: 4
|
54 |
+
- gradient_accumulation_steps: 16
|
55 |
- total_train_batch_size: 128
|
56 |
+
- total_eval_batch_size: 16
|
57 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
58 |
- lr_scheduler_type: cosine
|
59 |
- lr_scheduler_warmup_ratio: 0.1
|
|
|
63 |
|
64 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
65 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
66 |
+
| 0.1578 | 1.8957 | 100 | 0.4643 | -3.5909 | -5.3391 | 0.75 | 1.7481 | -515.7638 | -428.1683 | 94.0722 | 91.3541 |
|
67 |
|
68 |
|
69 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
-
"eval_logits/chosen": 96.
|
4 |
-
"eval_logits/rejected":
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins": 1.
|
11 |
-
"eval_rewards/rejected": -
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 750,
|
14 |
-
"eval_samples_per_second":
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
"total_flos": 0.0,
|
17 |
-
"train_loss": 0.
|
18 |
-
"train_runtime":
|
19 |
"train_samples": 6750,
|
20 |
-
"train_samples_per_second":
|
21 |
-
"train_steps_per_second": 0.
|
22 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
+
"eval_logits/chosen": 96.71578216552734,
|
4 |
+
"eval_logits/rejected": 90.98221588134766,
|
5 |
+
"eval_logps/chosen": -423.6227722167969,
|
6 |
+
"eval_logps/rejected": -453.7782287597656,
|
7 |
+
"eval_loss": 0.468290776014328,
|
8 |
+
"eval_rewards/accuracies": 0.7708333134651184,
|
9 |
+
"eval_rewards/chosen": -3.0221338272094727,
|
10 |
+
"eval_rewards/margins": 1.6591955423355103,
|
11 |
+
"eval_rewards/rejected": -4.681329727172852,
|
12 |
+
"eval_runtime": 58.6185,
|
13 |
"eval_samples": 750,
|
14 |
+
"eval_samples_per_second": 12.795,
|
15 |
+
"eval_steps_per_second": 0.409,
|
16 |
"total_flos": 0.0,
|
17 |
+
"train_loss": 0.3883641087091886,
|
18 |
+
"train_runtime": 2802.2739,
|
19 |
"train_samples": 6750,
|
20 |
+
"train_samples_per_second": 4.818,
|
21 |
+
"train_steps_per_second": 0.037
|
22 |
}
|
config.json
CHANGED
@@ -24,6 +24,6 @@
|
|
24 |
"rope_theta": 10000.0,
|
25 |
"torch_dtype": "bfloat16",
|
26 |
"transformers_version": "4.40.1",
|
27 |
-
"use_cache":
|
28 |
"vocab_size": 256000
|
29 |
}
|
|
|
24 |
"rope_theta": 10000.0,
|
25 |
"torch_dtype": "bfloat16",
|
26 |
"transformers_version": "4.40.1",
|
27 |
+
"use_cache": false,
|
28 |
"vocab_size": 256000
|
29 |
}
|
eval_results.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
-
"eval_logits/chosen": 96.
|
4 |
-
"eval_logits/rejected":
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins": 1.
|
11 |
-
"eval_rewards/rejected": -
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 750,
|
14 |
-
"eval_samples_per_second":
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
+
"eval_logits/chosen": 96.71578216552734,
|
4 |
+
"eval_logits/rejected": 90.98221588134766,
|
5 |
+
"eval_logps/chosen": -423.6227722167969,
|
6 |
+
"eval_logps/rejected": -453.7782287597656,
|
7 |
+
"eval_loss": 0.468290776014328,
|
8 |
+
"eval_rewards/accuracies": 0.7708333134651184,
|
9 |
+
"eval_rewards/chosen": -3.0221338272094727,
|
10 |
+
"eval_rewards/margins": 1.6591955423355103,
|
11 |
+
"eval_rewards/rejected": -4.681329727172852,
|
12 |
+
"eval_runtime": 58.6185,
|
13 |
"eval_samples": 750,
|
14 |
+
"eval_samples_per_second": 12.795,
|
15 |
+
"eval_steps_per_second": 0.409
|
16 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4995496656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:861ddf07decf97620b031a6e15e48a651e1034c23bd959c3ff531de2cb3fc3ef
|
3 |
size 4995496656
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982953168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08603a5d632cb46ee7b055c6a36a5a07d2166b085310ae9aca36b53732222289
|
3 |
size 4982953168
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982953200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bf49dcc8070ec107b57ff3c2256c6c28740b70300f7e1cb5c38b020fdd478da
|
3 |
size 4982953200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2113988336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3202396afc49b66318136fd06f7a9728e8cb9b624a3d5aaa731944d527748aa
|
3 |
size 2113988336
|
runs/Apr29_18-08-25_gcp002/events.out.tfevents.1714414192.gcp002.13104.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7df9aecf4657b82643e8f0eca97479f3cc310ea6d9af0d5dc3a4377609afee8
|
3 |
+
size 13441
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 6750,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.3883641087091886,
|
5 |
+
"train_runtime": 2802.2739,
|
6 |
"train_samples": 6750,
|
7 |
+
"train_samples_per_second": 4.818,
|
8 |
+
"train_steps_per_second": 0.037
|
9 |
}
|
trainer_state.json
CHANGED
@@ -10,12 +10,12 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.018957345971563982,
|
13 |
-
"grad_norm":
|
14 |
"learning_rate": 4.545454545454545e-08,
|
15 |
-
"logits/chosen":
|
16 |
-
"logits/rejected":
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
@@ -25,178 +25,178 @@
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.1895734597156398,
|
28 |
-
"grad_norm":
|
29 |
"learning_rate": 4.545454545454545e-07,
|
30 |
-
"logits/chosen":
|
31 |
-
"logits/rejected":
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen":
|
37 |
-
"rewards/margins":
|
38 |
-
"rewards/rejected": 0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.3791469194312796,
|
43 |
-
"grad_norm":
|
44 |
"learning_rate": 4.885348141000122e-07,
|
45 |
-
"logits/chosen":
|
46 |
-
"logits/rejected":
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": 0.
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected": -0.
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.5687203791469194,
|
58 |
-
"grad_norm":
|
59 |
"learning_rate": 4.5025027361734613e-07,
|
60 |
-
"logits/chosen":
|
61 |
-
"logits/rejected":
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen": -1.
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected": -2.
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.7582938388625592,
|
73 |
-
"grad_norm":
|
74 |
"learning_rate": 3.893311157806091e-07,
|
75 |
-
"logits/chosen":
|
76 |
-
"logits/rejected":
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen": -2.
|
82 |
-
"rewards/margins": 1.
|
83 |
-
"rewards/rejected": -3.
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.9478672985781991,
|
88 |
-
"grad_norm":
|
89 |
"learning_rate": 3.126631330646801e-07,
|
90 |
-
"logits/chosen":
|
91 |
-
"logits/rejected":
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen": -
|
97 |
-
"rewards/margins": 1.
|
98 |
-
"rewards/rejected": -3.
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
"epoch": 1.1374407582938388,
|
103 |
-
"grad_norm":
|
104 |
"learning_rate": 2.2891223348923882e-07,
|
105 |
-
"logits/chosen":
|
106 |
-
"logits/rejected":
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen": -2.
|
112 |
-
"rewards/margins": 2.
|
113 |
-
"rewards/rejected": -4.
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
"epoch": 1.3270142180094786,
|
118 |
-
"grad_norm":
|
119 |
"learning_rate": 1.4754491880085317e-07,
|
120 |
-
"logits/chosen":
|
121 |
-
"logits/rejected":
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen": -2.
|
127 |
-
"rewards/margins": 2.
|
128 |
-
"rewards/rejected": -5.
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
"epoch": 1.5165876777251186,
|
133 |
-
"grad_norm":
|
134 |
"learning_rate": 7.775827023107834e-08,
|
135 |
-
"logits/chosen":
|
136 |
-
"logits/rejected":
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss": 0.
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen": -
|
142 |
-
"rewards/margins":
|
143 |
-
"rewards/rejected": -6.
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
"epoch": 1.7061611374407581,
|
148 |
-
"grad_norm":
|
149 |
"learning_rate": 2.7440387297912122e-08,
|
150 |
-
"logits/chosen":
|
151 |
-
"logits/rejected": 123.
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen": -
|
157 |
-
"rewards/margins": 3.
|
158 |
-
"rewards/rejected": -6.
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
"epoch": 1.8957345971563981,
|
163 |
-
"grad_norm":
|
164 |
"learning_rate": 2.27878296044029e-09,
|
165 |
-
"logits/chosen":
|
166 |
-
"logits/rejected":
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss": 0.
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen": -
|
172 |
-
"rewards/margins":
|
173 |
-
"rewards/rejected": -
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
"epoch": 1.8957345971563981,
|
178 |
-
"eval_logits/chosen":
|
179 |
-
"eval_logits/rejected":
|
180 |
-
"eval_logps/chosen": -
|
181 |
-
"eval_logps/rejected": -
|
182 |
-
"eval_loss": 0.
|
183 |
-
"eval_rewards/accuracies": 0.
|
184 |
-
"eval_rewards/chosen": -
|
185 |
-
"eval_rewards/margins": 1.
|
186 |
-
"eval_rewards/rejected": -5.
|
187 |
-
"eval_runtime":
|
188 |
-
"eval_samples_per_second":
|
189 |
-
"eval_steps_per_second": 0.
|
190 |
"step": 100
|
191 |
},
|
192 |
{
|
193 |
"epoch": 1.971563981042654,
|
194 |
"step": 104,
|
195 |
"total_flos": 0.0,
|
196 |
-
"train_loss": 0.
|
197 |
-
"train_runtime":
|
198 |
-
"train_samples_per_second":
|
199 |
-
"train_steps_per_second": 0.
|
200 |
}
|
201 |
],
|
202 |
"logging_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.018957345971563982,
|
13 |
+
"grad_norm": 133.64062565621384,
|
14 |
"learning_rate": 4.545454545454545e-08,
|
15 |
+
"logits/chosen": 119.0696792602539,
|
16 |
+
"logits/rejected": 120.28123474121094,
|
17 |
+
"logps/chosen": -394.1268310546875,
|
18 |
+
"logps/rejected": -419.3145446777344,
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.1895734597156398,
|
28 |
+
"grad_norm": 130.60842697521545,
|
29 |
"learning_rate": 4.545454545454545e-07,
|
30 |
+
"logits/chosen": 133.6595001220703,
|
31 |
+
"logits/rejected": 136.7303466796875,
|
32 |
+
"logps/chosen": -410.0771484375,
|
33 |
+
"logps/rejected": -445.1907653808594,
|
34 |
+
"loss": 0.7019,
|
35 |
+
"rewards/accuracies": 0.46875,
|
36 |
+
"rewards/chosen": 0.020121444016695023,
|
37 |
+
"rewards/margins": 0.041466910392045975,
|
38 |
+
"rewards/rejected": -0.021345460787415504,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.3791469194312796,
|
43 |
+
"grad_norm": 127.29787487076526,
|
44 |
"learning_rate": 4.885348141000122e-07,
|
45 |
+
"logits/chosen": 122.2022476196289,
|
46 |
+
"logits/rejected": 128.57586669921875,
|
47 |
+
"logps/chosen": -357.1582336425781,
|
48 |
+
"logps/rejected": -416.08087158203125,
|
49 |
+
"loss": 0.6346,
|
50 |
+
"rewards/accuracies": 0.653124988079071,
|
51 |
+
"rewards/chosen": 0.25420495867729187,
|
52 |
+
"rewards/margins": 0.4108888506889343,
|
53 |
+
"rewards/rejected": -0.15668384730815887,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.5687203791469194,
|
58 |
+
"grad_norm": 110.05011163607695,
|
59 |
"learning_rate": 4.5025027361734613e-07,
|
60 |
+
"logits/chosen": 121.9586181640625,
|
61 |
+
"logits/rejected": 125.2878646850586,
|
62 |
+
"logps/chosen": -387.713134765625,
|
63 |
+
"logps/rejected": -442.55206298828125,
|
64 |
+
"loss": 0.5698,
|
65 |
+
"rewards/accuracies": 0.703125,
|
66 |
+
"rewards/chosen": -1.2848999500274658,
|
67 |
+
"rewards/margins": 0.9555079340934753,
|
68 |
+
"rewards/rejected": -2.240407943725586,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.7582938388625592,
|
73 |
+
"grad_norm": 111.08969508053838,
|
74 |
"learning_rate": 3.893311157806091e-07,
|
75 |
+
"logits/chosen": 121.52265930175781,
|
76 |
+
"logits/rejected": 119.2688980102539,
|
77 |
+
"logps/chosen": -402.15716552734375,
|
78 |
+
"logps/rejected": -444.649169921875,
|
79 |
+
"loss": 0.5496,
|
80 |
+
"rewards/accuracies": 0.7593749761581421,
|
81 |
+
"rewards/chosen": -2.0494799613952637,
|
82 |
+
"rewards/margins": 1.070623517036438,
|
83 |
+
"rewards/rejected": -3.120103359222412,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.9478672985781991,
|
88 |
+
"grad_norm": 122.82358054602282,
|
89 |
"learning_rate": 3.126631330646801e-07,
|
90 |
+
"logits/chosen": 128.3933868408203,
|
91 |
+
"logits/rejected": 133.44308471679688,
|
92 |
+
"logps/chosen": -431.0421447753906,
|
93 |
+
"logps/rejected": -497.99420166015625,
|
94 |
+
"loss": 0.4959,
|
95 |
+
"rewards/accuracies": 0.746874988079071,
|
96 |
+
"rewards/chosen": -2.2058186531066895,
|
97 |
+
"rewards/margins": 1.2984471321105957,
|
98 |
+
"rewards/rejected": -3.504265546798706,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
"epoch": 1.1374407582938388,
|
103 |
+
"grad_norm": 65.56687198861316,
|
104 |
"learning_rate": 2.2891223348923882e-07,
|
105 |
+
"logits/chosen": 124.70857238769531,
|
106 |
+
"logits/rejected": 126.91219329833984,
|
107 |
+
"logps/chosen": -420.981201171875,
|
108 |
+
"logps/rejected": -505.5345153808594,
|
109 |
+
"loss": 0.307,
|
110 |
+
"rewards/accuracies": 0.893750011920929,
|
111 |
+
"rewards/chosen": -2.4674336910247803,
|
112 |
+
"rewards/margins": 2.305318832397461,
|
113 |
+
"rewards/rejected": -4.772752285003662,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
"epoch": 1.3270142180094786,
|
118 |
+
"grad_norm": 56.106028687537446,
|
119 |
"learning_rate": 1.4754491880085317e-07,
|
120 |
+
"logits/chosen": 121.775146484375,
|
121 |
+
"logits/rejected": 125.95316314697266,
|
122 |
+
"logps/chosen": -425.7054138183594,
|
123 |
+
"logps/rejected": -518.8656005859375,
|
124 |
+
"loss": 0.1907,
|
125 |
+
"rewards/accuracies": 0.940625011920929,
|
126 |
+
"rewards/chosen": -2.623661518096924,
|
127 |
+
"rewards/margins": 2.869920253753662,
|
128 |
+
"rewards/rejected": -5.493582248687744,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
"epoch": 1.5165876777251186,
|
133 |
+
"grad_norm": 50.43661058282089,
|
134 |
"learning_rate": 7.775827023107834e-08,
|
135 |
+
"logits/chosen": 114.5962142944336,
|
136 |
+
"logits/rejected": 126.1790771484375,
|
137 |
+
"logps/chosen": -426.8082580566406,
|
138 |
+
"logps/rejected": -527.3065185546875,
|
139 |
+
"loss": 0.1761,
|
140 |
+
"rewards/accuracies": 0.9312499761581421,
|
141 |
+
"rewards/chosen": -2.979158401489258,
|
142 |
+
"rewards/margins": 3.0644469261169434,
|
143 |
+
"rewards/rejected": -6.043605804443359,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
"epoch": 1.7061611374407581,
|
148 |
+
"grad_norm": 45.81843583580765,
|
149 |
"learning_rate": 2.7440387297912122e-08,
|
150 |
+
"logits/chosen": 117.46388244628906,
|
151 |
+
"logits/rejected": 123.80489349365234,
|
152 |
+
"logps/chosen": -449.65399169921875,
|
153 |
+
"logps/rejected": -544.6094970703125,
|
154 |
+
"loss": 0.1515,
|
155 |
+
"rewards/accuracies": 0.984375,
|
156 |
+
"rewards/chosen": -2.8478360176086426,
|
157 |
+
"rewards/margins": 3.247156858444214,
|
158 |
+
"rewards/rejected": -6.0949931144714355,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
"epoch": 1.8957345971563981,
|
163 |
+
"grad_norm": 42.75820426735574,
|
164 |
"learning_rate": 2.27878296044029e-09,
|
165 |
+
"logits/chosen": 114.7729721069336,
|
166 |
+
"logits/rejected": 119.34477233886719,
|
167 |
+
"logps/chosen": -437.2296447753906,
|
168 |
+
"logps/rejected": -523.9191284179688,
|
169 |
+
"loss": 0.1578,
|
170 |
+
"rewards/accuracies": 0.9593750238418579,
|
171 |
+
"rewards/chosen": -2.8138155937194824,
|
172 |
+
"rewards/margins": 3.170293092727661,
|
173 |
+
"rewards/rejected": -5.984108924865723,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
"epoch": 1.8957345971563981,
|
178 |
+
"eval_logits/chosen": 91.35408782958984,
|
179 |
+
"eval_logits/rejected": 94.07221221923828,
|
180 |
+
"eval_logps/chosen": -428.1683349609375,
|
181 |
+
"eval_logps/rejected": -515.7637939453125,
|
182 |
+
"eval_loss": 0.4643263816833496,
|
183 |
+
"eval_rewards/accuracies": 0.75,
|
184 |
+
"eval_rewards/chosen": -3.5909416675567627,
|
185 |
+
"eval_rewards/margins": 1.7481167316436768,
|
186 |
+
"eval_rewards/rejected": -5.339057922363281,
|
187 |
+
"eval_runtime": 88.3612,
|
188 |
+
"eval_samples_per_second": 8.488,
|
189 |
+
"eval_steps_per_second": 0.532,
|
190 |
"step": 100
|
191 |
},
|
192 |
{
|
193 |
"epoch": 1.971563981042654,
|
194 |
"step": 104,
|
195 |
"total_flos": 0.0,
|
196 |
+
"train_loss": 0.3883641087091886,
|
197 |
+
"train_runtime": 2802.2739,
|
198 |
+
"train_samples_per_second": 4.818,
|
199 |
+
"train_steps_per_second": 0.037
|
200 |
}
|
201 |
],
|
202 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b835231394e8e7d484d57fdd04805c7ac65d3f2e0c869e656ccf783b2d023691
|
3 |
size 6264
|