chrlu commited on
Commit
f5a8c5e
·
verified ·
1 Parent(s): cea241f

Model save

Browse files
README.md CHANGED
@@ -2,16 +2,10 @@
2
  license: other
3
  base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - dpo
8
- - generated_from_trainer
9
  - trl
10
  - dpo
11
  - alignment-handbook
12
  - generated_from_trainer
13
- datasets:
14
- - argilla/dpo-mix-7k
15
  model-index:
16
  - name: zephyr-7b-gemma-dpo
17
  results: []
@@ -22,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # zephyr-7b-gemma-dpo
24
 
25
- This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on the argilla/dpo-mix-7k dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 0.4653
28
- - Rewards/chosen: -4.1128
29
- - Rewards/rejected: -5.6919
30
- - Rewards/accuracies: 0.7292
31
- - Rewards/margins: 1.5791
32
- - Logps/rejected: -475.9087
33
- - Logps/chosen: -445.9241
34
- - Logits/rejected: 91.1136
35
- - Logits/chosen: 96.7260
36
 
37
  ## Model description
38
 
@@ -56,10 +50,10 @@ The following hyperparameters were used during training:
56
  - eval_batch_size: 4
57
  - seed: 42
58
  - distributed_type: multi-GPU
59
- - num_devices: 8
60
- - gradient_accumulation_steps: 8
61
  - total_train_batch_size: 128
62
- - total_eval_batch_size: 32
63
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
64
  - lr_scheduler_type: cosine
65
  - lr_scheduler_warmup_ratio: 0.1
@@ -69,7 +63,7 @@ The following hyperparameters were used during training:
69
 
70
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
71
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
72
- | 0.1585 | 1.8957 | 100 | 0.4673 | -4.1293 | -5.7148 | 0.6979 | 1.5855 | -476.3664 | -446.2537 | 91.1323 | 96.7315 |
73
 
74
 
75
  ### Framework versions
 
2
  license: other
3
  base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
4
  tags:
 
 
 
 
5
  - trl
6
  - dpo
7
  - alignment-handbook
8
  - generated_from_trainer
 
 
9
  model-index:
10
  - name: zephyr-7b-gemma-dpo
11
  results: []
 
16
 
17
  # zephyr-7b-gemma-dpo
18
 
19
+ This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4643
22
+ - Rewards/chosen: -3.5909
23
+ - Rewards/rejected: -5.3391
24
+ - Rewards/accuracies: 0.75
25
+ - Rewards/margins: 1.7481
26
+ - Logps/rejected: -515.7638
27
+ - Logps/chosen: -428.1683
28
+ - Logits/rejected: 94.0722
29
+ - Logits/chosen: 91.3541
30
 
31
  ## Model description
32
 
 
50
  - eval_batch_size: 4
51
  - seed: 42
52
  - distributed_type: multi-GPU
53
+ - num_devices: 4
54
+ - gradient_accumulation_steps: 16
55
  - total_train_batch_size: 128
56
+ - total_eval_batch_size: 16
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
 
63
 
64
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.1578 | 1.8957 | 100 | 0.4643 | -3.5909 | -5.3391 | 0.75 | 1.7481 | -515.7638 | -428.1683 | 94.0722 | 91.3541 |
67
 
68
 
69
  ### Framework versions
all_results.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "epoch": 1.971563981042654,
3
- "eval_logits/chosen": 96.72599029541016,
4
- "eval_logits/rejected": 91.11358642578125,
5
- "eval_logps/chosen": -445.924072265625,
6
- "eval_logps/rejected": -475.90869140625,
7
- "eval_loss": 0.4652560353279114,
8
- "eval_rewards/accuracies": 0.7291666865348816,
9
- "eval_rewards/chosen": -4.112792491912842,
10
- "eval_rewards/margins": 1.5790935754776,
11
- "eval_rewards/rejected": -5.691885471343994,
12
- "eval_runtime": 119.6024,
13
  "eval_samples": 750,
14
- "eval_samples_per_second": 6.271,
15
- "eval_steps_per_second": 0.201,
16
  "total_flos": 0.0,
17
- "train_loss": 0.39153398688022906,
18
- "train_runtime": 2311.0387,
19
  "train_samples": 6750,
20
- "train_samples_per_second": 5.842,
21
- "train_steps_per_second": 0.045
22
  }
 
1
  {
2
  "epoch": 1.971563981042654,
3
+ "eval_logits/chosen": 96.71578216552734,
4
+ "eval_logits/rejected": 90.98221588134766,
5
+ "eval_logps/chosen": -423.6227722167969,
6
+ "eval_logps/rejected": -453.7782287597656,
7
+ "eval_loss": 0.468290776014328,
8
+ "eval_rewards/accuracies": 0.7708333134651184,
9
+ "eval_rewards/chosen": -3.0221338272094727,
10
+ "eval_rewards/margins": 1.6591955423355103,
11
+ "eval_rewards/rejected": -4.681329727172852,
12
+ "eval_runtime": 58.6185,
13
  "eval_samples": 750,
14
+ "eval_samples_per_second": 12.795,
15
+ "eval_steps_per_second": 0.409,
16
  "total_flos": 0.0,
17
+ "train_loss": 0.3883641087091886,
18
+ "train_runtime": 2802.2739,
19
  "train_samples": 6750,
20
+ "train_samples_per_second": 4.818,
21
+ "train_steps_per_second": 0.037
22
  }
config.json CHANGED
@@ -24,6 +24,6 @@
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.40.1",
27
- "use_cache": true,
28
  "vocab_size": 256000
29
  }
 
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.40.1",
27
+ "use_cache": false,
28
  "vocab_size": 256000
29
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 1.971563981042654,
3
- "eval_logits/chosen": 96.72599029541016,
4
- "eval_logits/rejected": 91.11358642578125,
5
- "eval_logps/chosen": -445.924072265625,
6
- "eval_logps/rejected": -475.90869140625,
7
- "eval_loss": 0.4652560353279114,
8
- "eval_rewards/accuracies": 0.7291666865348816,
9
- "eval_rewards/chosen": -4.112792491912842,
10
- "eval_rewards/margins": 1.5790935754776,
11
- "eval_rewards/rejected": -5.691885471343994,
12
- "eval_runtime": 119.6024,
13
  "eval_samples": 750,
14
- "eval_samples_per_second": 6.271,
15
- "eval_steps_per_second": 0.201
16
  }
 
1
  {
2
  "epoch": 1.971563981042654,
3
+ "eval_logits/chosen": 96.71578216552734,
4
+ "eval_logits/rejected": 90.98221588134766,
5
+ "eval_logps/chosen": -423.6227722167969,
6
+ "eval_logps/rejected": -453.7782287597656,
7
+ "eval_loss": 0.468290776014328,
8
+ "eval_rewards/accuracies": 0.7708333134651184,
9
+ "eval_rewards/chosen": -3.0221338272094727,
10
+ "eval_rewards/margins": 1.6591955423355103,
11
+ "eval_rewards/rejected": -4.681329727172852,
12
+ "eval_runtime": 58.6185,
13
  "eval_samples": 750,
14
+ "eval_samples_per_second": 12.795,
15
+ "eval_steps_per_second": 0.409
16
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b26faf7dcff7b7ca7bbf4ebc9d54968ab56cd1bbe5b3f4059d0ec34b7b1ccdd2
3
  size 4995496656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861ddf07decf97620b031a6e15e48a651e1034c23bd959c3ff531de2cb3fc3ef
3
  size 4995496656
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a41992ae6f80c80eaa24e7d8dbecab5d07c2802028c109568fe70565b4c6d8
3
  size 4982953168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08603a5d632cb46ee7b055c6a36a5a07d2166b085310ae9aca36b53732222289
3
  size 4982953168
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d1d7ab5de3f2e26234060bf0c99e343d3a84489614f455b267bd22f059fc862
3
  size 4982953200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf49dcc8070ec107b57ff3c2256c6c28740b70300f7e1cb5c38b020fdd478da
3
  size 4982953200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07f5983e107d05b629942a14afa7af7fe7e3836b05bc872e472789542c0f95b6
3
  size 2113988336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3202396afc49b66318136fd06f7a9728e8cb9b624a3d5aaa731944d527748aa
3
  size 2113988336
runs/Apr29_18-08-25_gcp002/events.out.tfevents.1714414192.gcp002.13104.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7df9aecf4657b82643e8f0eca97479f3cc310ea6d9af0d5dc3a4377609afee8
3
+ size 13441
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
- "train_loss": 0.39153398688022906,
5
- "train_runtime": 2311.0387,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 5.842,
8
- "train_steps_per_second": 0.045
9
  }
 
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3883641087091886,
5
+ "train_runtime": 2802.2739,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 4.818,
8
+ "train_steps_per_second": 0.037
9
  }
trainer_state.json CHANGED
@@ -10,12 +10,12 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.018957345971563982,
13
- "grad_norm": 132.15360444004384,
14
  "learning_rate": 4.545454545454545e-08,
15
- "logits/chosen": 117.53560638427734,
16
- "logits/rejected": 126.8960952758789,
17
- "logps/chosen": -335.40118408203125,
18
- "logps/rejected": -439.16552734375,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -25,178 +25,178 @@
25
  },
26
  {
27
  "epoch": 0.1895734597156398,
28
- "grad_norm": 132.3674027987073,
29
  "learning_rate": 4.545454545454545e-07,
30
- "logits/chosen": 135.01699829101562,
31
- "logits/rejected": 138.37664794921875,
32
- "logps/chosen": -396.05718994140625,
33
- "logps/rejected": -439.1203918457031,
34
- "loss": 0.7127,
35
- "rewards/accuracies": 0.4583333432674408,
36
- "rewards/chosen": -0.0030322629027068615,
37
- "rewards/margins": -0.013390823267400265,
38
- "rewards/rejected": 0.010358559899032116,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.3791469194312796,
43
- "grad_norm": 131.21733523095625,
44
  "learning_rate": 4.885348141000122e-07,
45
- "logits/chosen": 121.60444641113281,
46
- "logits/rejected": 125.29842376708984,
47
- "logps/chosen": -370.2664489746094,
48
- "logps/rejected": -422.78851318359375,
49
- "loss": 0.6459,
50
- "rewards/accuracies": 0.612500011920929,
51
- "rewards/chosen": 0.10727670043706894,
52
- "rewards/margins": 0.247134730219841,
53
- "rewards/rejected": -0.13985800743103027,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5687203791469194,
58
- "grad_norm": 117.90232463642135,
59
  "learning_rate": 4.5025027361734613e-07,
60
- "logits/chosen": 142.974853515625,
61
- "logits/rejected": 136.52386474609375,
62
- "logps/chosen": -424.7781677246094,
63
- "logps/rejected": -469.64813232421875,
64
- "loss": 0.5746,
65
- "rewards/accuracies": 0.6937500238418579,
66
- "rewards/chosen": -1.6156466007232666,
67
- "rewards/margins": 0.8666501045227051,
68
- "rewards/rejected": -2.4822967052459717,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.7582938388625592,
73
- "grad_norm": 104.91283452119073,
74
  "learning_rate": 3.893311157806091e-07,
75
- "logits/chosen": 126.9936752319336,
76
- "logits/rejected": 115.53365325927734,
77
- "logps/chosen": -399.81353759765625,
78
- "logps/rejected": -426.99853515625,
79
- "loss": 0.5456,
80
- "rewards/accuracies": 0.737500011920929,
81
- "rewards/chosen": -2.2809689044952393,
82
- "rewards/margins": 1.1751956939697266,
83
- "rewards/rejected": -3.456164598464966,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.9478672985781991,
88
- "grad_norm": 123.57780236639618,
89
  "learning_rate": 3.126631330646801e-07,
90
- "logits/chosen": 142.1190643310547,
91
- "logits/rejected": 146.2515411376953,
92
- "logps/chosen": -456.97979736328125,
93
- "logps/rejected": -540.1392822265625,
94
- "loss": 0.489,
95
- "rewards/accuracies": 0.7875000238418579,
96
- "rewards/chosen": -1.891798734664917,
97
- "rewards/margins": 1.2988468408584595,
98
- "rewards/rejected": -3.190645456314087,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 1.1374407582938388,
103
- "grad_norm": 67.1680971334559,
104
  "learning_rate": 2.2891223348923882e-07,
105
- "logits/chosen": 133.56114196777344,
106
- "logits/rejected": 137.20738220214844,
107
- "logps/chosen": -449.55303955078125,
108
- "logps/rejected": -534.8367919921875,
109
- "loss": 0.3117,
110
- "rewards/accuracies": 0.887499988079071,
111
- "rewards/chosen": -2.5773684978485107,
112
- "rewards/margins": 2.346193790435791,
113
- "rewards/rejected": -4.923562049865723,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 1.3270142180094786,
118
- "grad_norm": 48.54475300946312,
119
  "learning_rate": 1.4754491880085317e-07,
120
- "logits/chosen": 125.71492004394531,
121
- "logits/rejected": 127.68719482421875,
122
- "logps/chosen": -426.90228271484375,
123
- "logps/rejected": -528.0679321289062,
124
- "loss": 0.195,
125
- "rewards/accuracies": 0.9437500238418579,
126
- "rewards/chosen": -2.9879212379455566,
127
- "rewards/margins": 2.7397806644439697,
128
- "rewards/rejected": -5.727701663970947,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.5165876777251186,
133
- "grad_norm": 55.371866892062,
134
  "learning_rate": 7.775827023107834e-08,
135
- "logits/chosen": 111.7248306274414,
136
- "logits/rejected": 128.3420867919922,
137
- "logps/chosen": -427.53106689453125,
138
- "logps/rejected": -546.7640991210938,
139
- "loss": 0.1651,
140
- "rewards/accuracies": 0.949999988079071,
141
- "rewards/chosen": -3.693999767303467,
142
- "rewards/margins": 2.9823195934295654,
143
- "rewards/rejected": -6.676319122314453,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 1.7061611374407581,
148
- "grad_norm": 40.99464664899818,
149
  "learning_rate": 2.7440387297912122e-08,
150
- "logits/chosen": 110.8941879272461,
151
- "logits/rejected": 123.70848083496094,
152
- "logps/chosen": -457.2183532714844,
153
- "logps/rejected": -575.8634033203125,
154
- "loss": 0.1557,
155
- "rewards/accuracies": 0.9750000238418579,
156
- "rewards/chosen": -3.5393664836883545,
157
- "rewards/margins": 3.369715929031372,
158
- "rewards/rejected": -6.909082889556885,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 1.8957345971563981,
163
- "grad_norm": 45.385328063823785,
164
  "learning_rate": 2.27878296044029e-09,
165
- "logits/chosen": 117.1551284790039,
166
- "logits/rejected": 117.0487060546875,
167
- "logps/chosen": -446.9934997558594,
168
- "logps/rejected": -541.2728881835938,
169
- "loss": 0.1585,
170
- "rewards/accuracies": 0.925000011920929,
171
- "rewards/chosen": -3.250919818878174,
172
- "rewards/margins": 2.9745240211486816,
173
- "rewards/rejected": -6.2254438400268555,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 1.8957345971563981,
178
- "eval_logits/chosen": 96.73149871826172,
179
- "eval_logits/rejected": 91.1323013305664,
180
- "eval_logps/chosen": -446.253662109375,
181
- "eval_logps/rejected": -476.3663635253906,
182
- "eval_loss": 0.46732592582702637,
183
- "eval_rewards/accuracies": 0.6979166865348816,
184
- "eval_rewards/chosen": -4.1292724609375,
185
- "eval_rewards/margins": 1.5854991674423218,
186
- "eval_rewards/rejected": -5.714771270751953,
187
- "eval_runtime": 120.4793,
188
- "eval_samples_per_second": 6.225,
189
- "eval_steps_per_second": 0.199,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 1.971563981042654,
194
  "step": 104,
195
  "total_flos": 0.0,
196
- "train_loss": 0.39153398688022906,
197
- "train_runtime": 2311.0387,
198
- "train_samples_per_second": 5.842,
199
- "train_steps_per_second": 0.045
200
  }
201
  ],
202
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.018957345971563982,
13
+ "grad_norm": 133.64062565621384,
14
  "learning_rate": 4.545454545454545e-08,
15
+ "logits/chosen": 119.0696792602539,
16
+ "logits/rejected": 120.28123474121094,
17
+ "logps/chosen": -394.1268310546875,
18
+ "logps/rejected": -419.3145446777344,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.1895734597156398,
28
+ "grad_norm": 130.60842697521545,
29
  "learning_rate": 4.545454545454545e-07,
30
+ "logits/chosen": 133.6595001220703,
31
+ "logits/rejected": 136.7303466796875,
32
+ "logps/chosen": -410.0771484375,
33
+ "logps/rejected": -445.1907653808594,
34
+ "loss": 0.7019,
35
+ "rewards/accuracies": 0.46875,
36
+ "rewards/chosen": 0.020121444016695023,
37
+ "rewards/margins": 0.041466910392045975,
38
+ "rewards/rejected": -0.021345460787415504,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.3791469194312796,
43
+ "grad_norm": 127.29787487076526,
44
  "learning_rate": 4.885348141000122e-07,
45
+ "logits/chosen": 122.2022476196289,
46
+ "logits/rejected": 128.57586669921875,
47
+ "logps/chosen": -357.1582336425781,
48
+ "logps/rejected": -416.08087158203125,
49
+ "loss": 0.6346,
50
+ "rewards/accuracies": 0.653124988079071,
51
+ "rewards/chosen": 0.25420495867729187,
52
+ "rewards/margins": 0.4108888506889343,
53
+ "rewards/rejected": -0.15668384730815887,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5687203791469194,
58
+ "grad_norm": 110.05011163607695,
59
  "learning_rate": 4.5025027361734613e-07,
60
+ "logits/chosen": 121.9586181640625,
61
+ "logits/rejected": 125.2878646850586,
62
+ "logps/chosen": -387.713134765625,
63
+ "logps/rejected": -442.55206298828125,
64
+ "loss": 0.5698,
65
+ "rewards/accuracies": 0.703125,
66
+ "rewards/chosen": -1.2848999500274658,
67
+ "rewards/margins": 0.9555079340934753,
68
+ "rewards/rejected": -2.240407943725586,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.7582938388625592,
73
+ "grad_norm": 111.08969508053838,
74
  "learning_rate": 3.893311157806091e-07,
75
+ "logits/chosen": 121.52265930175781,
76
+ "logits/rejected": 119.2688980102539,
77
+ "logps/chosen": -402.15716552734375,
78
+ "logps/rejected": -444.649169921875,
79
+ "loss": 0.5496,
80
+ "rewards/accuracies": 0.7593749761581421,
81
+ "rewards/chosen": -2.0494799613952637,
82
+ "rewards/margins": 1.070623517036438,
83
+ "rewards/rejected": -3.120103359222412,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.9478672985781991,
88
+ "grad_norm": 122.82358054602282,
89
  "learning_rate": 3.126631330646801e-07,
90
+ "logits/chosen": 128.3933868408203,
91
+ "logits/rejected": 133.44308471679688,
92
+ "logps/chosen": -431.0421447753906,
93
+ "logps/rejected": -497.99420166015625,
94
+ "loss": 0.4959,
95
+ "rewards/accuracies": 0.746874988079071,
96
+ "rewards/chosen": -2.2058186531066895,
97
+ "rewards/margins": 1.2984471321105957,
98
+ "rewards/rejected": -3.504265546798706,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 1.1374407582938388,
103
+ "grad_norm": 65.56687198861316,
104
  "learning_rate": 2.2891223348923882e-07,
105
+ "logits/chosen": 124.70857238769531,
106
+ "logits/rejected": 126.91219329833984,
107
+ "logps/chosen": -420.981201171875,
108
+ "logps/rejected": -505.5345153808594,
109
+ "loss": 0.307,
110
+ "rewards/accuracies": 0.893750011920929,
111
+ "rewards/chosen": -2.4674336910247803,
112
+ "rewards/margins": 2.305318832397461,
113
+ "rewards/rejected": -4.772752285003662,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 1.3270142180094786,
118
+ "grad_norm": 56.106028687537446,
119
  "learning_rate": 1.4754491880085317e-07,
120
+ "logits/chosen": 121.775146484375,
121
+ "logits/rejected": 125.95316314697266,
122
+ "logps/chosen": -425.7054138183594,
123
+ "logps/rejected": -518.8656005859375,
124
+ "loss": 0.1907,
125
+ "rewards/accuracies": 0.940625011920929,
126
+ "rewards/chosen": -2.623661518096924,
127
+ "rewards/margins": 2.869920253753662,
128
+ "rewards/rejected": -5.493582248687744,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.5165876777251186,
133
+ "grad_norm": 50.43661058282089,
134
  "learning_rate": 7.775827023107834e-08,
135
+ "logits/chosen": 114.5962142944336,
136
+ "logits/rejected": 126.1790771484375,
137
+ "logps/chosen": -426.8082580566406,
138
+ "logps/rejected": -527.3065185546875,
139
+ "loss": 0.1761,
140
+ "rewards/accuracies": 0.9312499761581421,
141
+ "rewards/chosen": -2.979158401489258,
142
+ "rewards/margins": 3.0644469261169434,
143
+ "rewards/rejected": -6.043605804443359,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 1.7061611374407581,
148
+ "grad_norm": 45.81843583580765,
149
  "learning_rate": 2.7440387297912122e-08,
150
+ "logits/chosen": 117.46388244628906,
151
+ "logits/rejected": 123.80489349365234,
152
+ "logps/chosen": -449.65399169921875,
153
+ "logps/rejected": -544.6094970703125,
154
+ "loss": 0.1515,
155
+ "rewards/accuracies": 0.984375,
156
+ "rewards/chosen": -2.8478360176086426,
157
+ "rewards/margins": 3.247156858444214,
158
+ "rewards/rejected": -6.0949931144714355,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 1.8957345971563981,
163
+ "grad_norm": 42.75820426735574,
164
  "learning_rate": 2.27878296044029e-09,
165
+ "logits/chosen": 114.7729721069336,
166
+ "logits/rejected": 119.34477233886719,
167
+ "logps/chosen": -437.2296447753906,
168
+ "logps/rejected": -523.9191284179688,
169
+ "loss": 0.1578,
170
+ "rewards/accuracies": 0.9593750238418579,
171
+ "rewards/chosen": -2.8138155937194824,
172
+ "rewards/margins": 3.170293092727661,
173
+ "rewards/rejected": -5.984108924865723,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 1.8957345971563981,
178
+ "eval_logits/chosen": 91.35408782958984,
179
+ "eval_logits/rejected": 94.07221221923828,
180
+ "eval_logps/chosen": -428.1683349609375,
181
+ "eval_logps/rejected": -515.7637939453125,
182
+ "eval_loss": 0.4643263816833496,
183
+ "eval_rewards/accuracies": 0.75,
184
+ "eval_rewards/chosen": -3.5909416675567627,
185
+ "eval_rewards/margins": 1.7481167316436768,
186
+ "eval_rewards/rejected": -5.339057922363281,
187
+ "eval_runtime": 88.3612,
188
+ "eval_samples_per_second": 8.488,
189
+ "eval_steps_per_second": 0.532,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 1.971563981042654,
194
  "step": 104,
195
  "total_flos": 0.0,
196
+ "train_loss": 0.3883641087091886,
197
+ "train_runtime": 2802.2739,
198
+ "train_samples_per_second": 4.818,
199
+ "train_steps_per_second": 0.037
200
  }
201
  ],
202
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79ee78a4306adfc04ffa07fc0ca8acbb9d3417b9d7c9f4adaf815a8d83ea6a24
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b835231394e8e7d484d57fdd04805c7ac65d3f2e0c869e656ccf783b2d023691
3
  size 6264