Howard881010 commited on
Commit
ec84c3f
1 Parent(s): 8c77e6a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. README.md +91 -0
  3. adapter_config.json +34 -0
  4. adapter_model.safetensors +3 -0
  5. all_results.json +20 -0
  6. checkpoint-1000/README.md +202 -0
  7. checkpoint-1000/adapter_config.json +34 -0
  8. checkpoint-1000/adapter_model.safetensors +3 -0
  9. checkpoint-1000/optimizer.pt +3 -0
  10. checkpoint-1000/rng_state_0.pth +3 -0
  11. checkpoint-1000/rng_state_1.pth +3 -0
  12. checkpoint-1000/scheduler.pt +3 -0
  13. checkpoint-1000/special_tokens_map.json +24 -0
  14. checkpoint-1000/tokenizer.json +3 -0
  15. checkpoint-1000/tokenizer_config.json +0 -0
  16. checkpoint-1000/trainer_state.json +1789 -0
  17. checkpoint-1000/training_args.bin +3 -0
  18. checkpoint-1125/README.md +202 -0
  19. checkpoint-1125/adapter_config.json +34 -0
  20. checkpoint-1125/adapter_model.safetensors +3 -0
  21. checkpoint-1125/optimizer.pt +3 -0
  22. checkpoint-1125/rng_state_0.pth +3 -0
  23. checkpoint-1125/rng_state_1.pth +3 -0
  24. checkpoint-1125/scheduler.pt +3 -0
  25. checkpoint-1125/special_tokens_map.json +24 -0
  26. checkpoint-1125/tokenizer.json +3 -0
  27. checkpoint-1125/tokenizer_config.json +0 -0
  28. checkpoint-1125/trainer_state.json +2001 -0
  29. checkpoint-1125/training_args.bin +3 -0
  30. checkpoint-500/README.md +202 -0
  31. checkpoint-500/adapter_config.json +34 -0
  32. checkpoint-500/adapter_model.safetensors +3 -0
  33. checkpoint-500/optimizer.pt +3 -0
  34. checkpoint-500/rng_state_0.pth +3 -0
  35. checkpoint-500/rng_state_1.pth +3 -0
  36. checkpoint-500/scheduler.pt +3 -0
  37. checkpoint-500/special_tokens_map.json +24 -0
  38. checkpoint-500/tokenizer.json +3 -0
  39. checkpoint-500/tokenizer_config.json +0 -0
  40. checkpoint-500/trainer_state.json +911 -0
  41. checkpoint-500/training_args.bin +3 -0
  42. eval_results.json +15 -0
  43. runs/Dec18_22-22-59_yadi/events.out.tfevents.1734560709.yadi.436386.0 +3 -0
  44. runs/Dec18_22-22-59_yadi/events.out.tfevents.1734574921.yadi.436386.1 +3 -0
  45. special_tokens_map.json +24 -0
  46. tokenizer.json +3 -0
  47. tokenizer_config.json +0 -0
  48. train_results.json +8 -0
  49. trainer_log.jsonl +131 -0
  50. trainer_state.json +2010 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ license: other
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: sft_dpo_fs
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # sft_dpo_fs
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407) on the heat_transfer_dpo dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.1535
22
+ - Rewards/chosen: 17.2823
23
+ - Rewards/rejected: 11.3004
24
+ - Rewards/accuracies: 0.9610
25
+ - Rewards/margins: 5.9819
26
+ - Logps/chosen: -2.2063
27
+ - Logps/rejected: -60.6033
28
+ - Logits/chosen: 0.0035
29
+ - Logits/rejected: -0.0076
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-06
49
+ - train_batch_size: 4
50
+ - eval_batch_size: 4
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 2
54
+ - total_train_batch_size: 8
55
+ - total_eval_batch_size: 8
56
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
57
+ - lr_scheduler_type: cosine
58
+ - lr_scheduler_warmup_ratio: 0.1
59
+ - num_epochs: 1
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/chosen | Logps/rejected | Logits/chosen | Logits/rejected |
64
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:------------:|:--------------:|:-------------:|:---------------:|
65
+ | 0.3835 | 0.0533 | 60 | 0.3287 | 17.1482 | 15.7095 | 0.9280 | 1.4386 | -3.5472 | -16.5118 | -0.5827 | -0.5914 |
66
+ | 0.2552 | 0.1067 | 120 | 0.1900 | 17.1335 | 13.7535 | 0.9320 | 3.3799 | -3.6944 | -36.0722 | -0.2065 | -0.2218 |
67
+ | 0.2362 | 0.16 | 180 | 0.2024 | 17.0614 | 11.9722 | 0.9510 | 5.0892 | -4.4150 | -53.8850 | -0.1087 | -0.1222 |
68
+ | 0.1781 | 0.2133 | 240 | 0.1546 | 17.0620 | 12.2862 | 0.9500 | 4.7758 | -4.4089 | -50.7448 | -0.1243 | -0.1381 |
69
+ | 0.265 | 0.2667 | 300 | 0.1536 | 17.2493 | 12.6444 | 0.9440 | 4.6050 | -2.5355 | -47.1637 | -0.1744 | -0.1856 |
70
+ | 0.1605 | 0.32 | 360 | 0.3194 | 17.3612 | 12.2655 | 0.9210 | 5.0958 | -1.4165 | -50.9525 | -0.1062 | -0.1173 |
71
+ | 0.2894 | 0.3733 | 420 | 0.1679 | 17.3116 | 12.2496 | 0.9450 | 5.0620 | -1.9131 | -51.1113 | -0.0905 | -0.1026 |
72
+ | 0.1149 | 0.4267 | 480 | 0.2951 | 17.0540 | 11.9844 | 0.9230 | 5.0696 | -4.4890 | -53.7628 | -0.0770 | -0.0883 |
73
+ | 0.0384 | 0.48 | 540 | 0.1739 | 17.2042 | 12.1334 | 0.9490 | 5.0708 | -2.9873 | -52.2731 | -0.0512 | -0.0612 |
74
+ | 0.4008 | 0.5333 | 600 | 0.1706 | 17.2853 | 11.6981 | 0.9470 | 5.5872 | -2.1760 | -56.6266 | -0.0358 | -0.0469 |
75
+ | 0.1678 | 0.5867 | 660 | 0.2050 | 17.2021 | 11.5656 | 0.9450 | 5.6365 | -3.0082 | -57.9516 | -0.0160 | -0.0270 |
76
+ | 0.2272 | 0.64 | 720 | 0.1402 | 17.3928 | 11.7696 | 0.9520 | 5.6233 | -1.1005 | -55.9117 | -0.0229 | -0.0322 |
77
+ | 0.1915 | 0.6933 | 780 | 0.2441 | 17.3947 | 11.7656 | 0.9320 | 5.6290 | -1.0823 | -55.9507 | -0.0166 | -0.0266 |
78
+ | 0.0635 | 0.7467 | 840 | 0.1689 | 17.3812 | 11.5343 | 0.9450 | 5.8469 | -1.2169 | -58.2643 | -0.0111 | -0.0217 |
79
+ | 0.1703 | 0.8 | 900 | 0.1400 | 17.3271 | 11.3817 | 0.9610 | 5.9455 | -1.7577 | -59.7906 | 0.0002 | -0.0105 |
80
+ | 0.1138 | 0.8533 | 960 | 0.1441 | 17.3149 | 11.3432 | 0.9630 | 5.9718 | -1.8795 | -60.1756 | 0.0015 | -0.0094 |
81
+ | 0.0513 | 0.9067 | 1020 | 0.1412 | 17.3211 | 11.3263 | 0.9610 | 5.9948 | -1.8178 | -60.3445 | 0.0045 | -0.0065 |
82
+ | 0.1189 | 0.96 | 1080 | 0.1508 | 17.2887 | 11.3001 | 0.9610 | 5.9886 | -2.1420 | -60.6061 | 0.0074 | -0.0036 |
83
+
84
+
85
+ ### Framework versions
86
+
87
+ - PEFT 0.12.0
88
+ - Transformers 4.46.0
89
+ - Pytorch 2.4.0+cu121
90
+ - Datasets 2.21.0
91
+ - Tokenizers 0.20.1
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "gate_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "down_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6404a5e1c60aa26a99a4ccbb402f485483d5eea90c08441f0448e29c51e2b1d0
3
+ size 114106856
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": 0.0034948738757520914,
4
+ "eval_logits/rejected": -0.007565807551145554,
5
+ "eval_logps/chosen": -2.20627498626709,
6
+ "eval_logps/rejected": -60.60332489013672,
7
+ "eval_loss": 0.15350937843322754,
8
+ "eval_rewards/accuracies": 0.9610000848770142,
9
+ "eval_rewards/chosen": 17.28226661682129,
10
+ "eval_rewards/margins": 5.981877326965332,
11
+ "eval_rewards/rejected": 11.30038833618164,
12
+ "eval_runtime": 361.5652,
13
+ "eval_samples_per_second": 2.766,
14
+ "eval_steps_per_second": 0.346,
15
+ "total_flos": 1.4338459346927616e+18,
16
+ "train_loss": 0.23597783709896936,
17
+ "train_runtime": 13850.7044,
18
+ "train_samples_per_second": 0.65,
19
+ "train_steps_per_second": 0.081
20
+ }
checkpoint-1000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "gate_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "down_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-1000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b661db279de1245ec057ccc9ea7b2a9d8e593601b17b234d71a8b65eea724cc
3
+ size 114106856
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5df0d088d43a257985e1c6b5e7afebb17236c24d7243b77ad1477e87251acd3
3
+ size 228536930
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d62848396b464be71e7417b09981050d5f1d377e9e0c4894645772a871d81e
3
+ size 14512
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a408fa3a85250385243240a3c4f6e6dc4513da1d97421e8faae4c8d880e7d1a
3
+ size 14512
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4549dc1554a9cef5b3b89b84239d0624d2e4f7b2af98173873173c77ff786861
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
checkpoint-1000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,1789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.8888888888888888,
5
+ "eval_steps": 60,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.008888888888888889,
13
+ "grad_norm": 8.760091781616211,
14
+ "learning_rate": 4.4247787610619474e-07,
15
+ "logits/chosen": -0.8248252868652344,
16
+ "logits/rejected": -0.8263720273971558,
17
+ "logps/chosen": -0.36086463928222656,
18
+ "logps/rejected": -5.696224689483643,
19
+ "loss": 1.1038,
20
+ "rewards/accuracies": 0.5125000476837158,
21
+ "rewards/chosen": 17.43745994567871,
22
+ "rewards/margins": 0.5984855890274048,
23
+ "rewards/rejected": 16.838973999023438,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.017777777777777778,
28
+ "grad_norm": 8.855981826782227,
29
+ "learning_rate": 8.849557522123895e-07,
30
+ "logits/chosen": -0.8169006109237671,
31
+ "logits/rejected": -0.819770872592926,
32
+ "logps/chosen": -0.12464660406112671,
33
+ "logps/rejected": -7.139842987060547,
34
+ "loss": 1.1887,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 17.17649269104004,
37
+ "rewards/margins": 0.19107049703598022,
38
+ "rewards/rejected": 16.98542022705078,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.02666666666666667,
43
+ "grad_norm": 16.764184951782227,
44
+ "learning_rate": 1.3274336283185843e-06,
45
+ "logits/chosen": -0.8003113865852356,
46
+ "logits/rejected": -0.8030117750167847,
47
+ "logps/chosen": -0.34651467204093933,
48
+ "logps/rejected": -6.967917442321777,
49
+ "loss": 1.0563,
50
+ "rewards/accuracies": 0.44999998807907104,
51
+ "rewards/chosen": 17.280975341796875,
52
+ "rewards/margins": 0.40005987882614136,
53
+ "rewards/rejected": 16.88091468811035,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.035555555555555556,
58
+ "grad_norm": 8.33682918548584,
59
+ "learning_rate": 1.769911504424779e-06,
60
+ "logits/chosen": -0.7695047855377197,
61
+ "logits/rejected": -0.7739207148551941,
62
+ "logps/chosen": -1.5993006229400635,
63
+ "logps/rejected": -8.504932403564453,
64
+ "loss": 0.7596,
65
+ "rewards/accuracies": 0.5,
66
+ "rewards/chosen": 17.283912658691406,
67
+ "rewards/margins": 0.6976072192192078,
68
+ "rewards/rejected": 16.5863037109375,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.044444444444444446,
73
+ "grad_norm": 4.494723320007324,
74
+ "learning_rate": 2.212389380530974e-06,
75
+ "logits/chosen": -0.7154140472412109,
76
+ "logits/rejected": -0.7225576043128967,
77
+ "logps/chosen": -3.112199068069458,
78
+ "logps/rejected": -12.212080001831055,
79
+ "loss": 0.6083,
80
+ "rewards/accuracies": 0.4625000059604645,
81
+ "rewards/chosen": 17.03064727783203,
82
+ "rewards/margins": 0.7148451805114746,
83
+ "rewards/rejected": 16.3158016204834,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.05333333333333334,
88
+ "grad_norm": 5.110287666320801,
89
+ "learning_rate": 2.6548672566371687e-06,
90
+ "logits/chosen": -0.6322453022003174,
91
+ "logits/rejected": -0.6387485265731812,
92
+ "logps/chosen": -5.650620460510254,
93
+ "logps/rejected": -12.759811401367188,
94
+ "loss": 0.3835,
95
+ "rewards/accuracies": 0.9125000238418579,
96
+ "rewards/chosen": 17.101289749145508,
97
+ "rewards/margins": 1.1824612617492676,
98
+ "rewards/rejected": 15.918828964233398,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.05333333333333334,
103
+ "eval_logits/chosen": -0.5826543569564819,
104
+ "eval_logits/rejected": -0.5914276838302612,
105
+ "eval_logps/chosen": -3.5471787452697754,
106
+ "eval_logps/rejected": -16.51181983947754,
107
+ "eval_loss": 0.3286525011062622,
108
+ "eval_rewards/accuracies": 0.9280000925064087,
109
+ "eval_rewards/chosen": 17.148174285888672,
110
+ "eval_rewards/margins": 1.4386365413665771,
111
+ "eval_rewards/rejected": 15.709539413452148,
112
+ "eval_runtime": 372.0227,
113
+ "eval_samples_per_second": 2.688,
114
+ "eval_steps_per_second": 0.336,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.06222222222222222,
119
+ "grad_norm": 5.098133563995361,
120
+ "learning_rate": 3.097345132743363e-06,
121
+ "logits/chosen": -0.5378152132034302,
122
+ "logits/rejected": -0.5494933724403381,
123
+ "logps/chosen": -1.5099802017211914,
124
+ "logps/rejected": -21.206321716308594,
125
+ "loss": 0.2931,
126
+ "rewards/accuracies": 0.9375,
127
+ "rewards/chosen": 17.083791732788086,
128
+ "rewards/margins": 1.5844331979751587,
129
+ "rewards/rejected": 15.499359130859375,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.07111111111111111,
134
+ "grad_norm": 29.787437438964844,
135
+ "learning_rate": 3.539823008849558e-06,
136
+ "logits/chosen": -0.443774938583374,
137
+ "logits/rejected": -0.45571577548980713,
138
+ "logps/chosen": -1.5804342031478882,
139
+ "logps/rejected": -22.606929779052734,
140
+ "loss": 0.202,
141
+ "rewards/accuracies": 0.9750000238418579,
142
+ "rewards/chosen": 17.302125930786133,
143
+ "rewards/margins": 2.174014091491699,
144
+ "rewards/rejected": 15.128110885620117,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.08,
149
+ "grad_norm": 23.14398193359375,
150
+ "learning_rate": 3.982300884955752e-06,
151
+ "logits/chosen": -0.3626072406768799,
152
+ "logits/rejected": -0.3787815570831299,
153
+ "logps/chosen": -2.203828811645508,
154
+ "logps/rejected": -29.433551788330078,
155
+ "loss": 0.2123,
156
+ "rewards/accuracies": 0.925000011920929,
157
+ "rewards/chosen": 17.00284194946289,
158
+ "rewards/margins": 2.320391893386841,
159
+ "rewards/rejected": 14.682450294494629,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.08888888888888889,
164
+ "grad_norm": 29.672739028930664,
165
+ "learning_rate": 4.424778761061948e-06,
166
+ "logits/chosen": -0.3035663962364197,
167
+ "logits/rejected": -0.31762221455574036,
168
+ "logps/chosen": -3.433589458465576,
169
+ "logps/rejected": -29.9322509765625,
170
+ "loss": 0.2592,
171
+ "rewards/accuracies": 0.9375,
172
+ "rewards/chosen": 16.929956436157227,
173
+ "rewards/margins": 2.31272029876709,
174
+ "rewards/rejected": 14.617237091064453,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.09777777777777778,
179
+ "grad_norm": 1.873722791671753,
180
+ "learning_rate": 4.867256637168142e-06,
181
+ "logits/chosen": -0.2679600715637207,
182
+ "logits/rejected": -0.2826440930366516,
183
+ "logps/chosen": -0.9653514623641968,
184
+ "logps/rejected": -30.235322952270508,
185
+ "loss": 0.1336,
186
+ "rewards/accuracies": 0.949999988079071,
187
+ "rewards/chosen": 17.462385177612305,
188
+ "rewards/margins": 3.1994175910949707,
189
+ "rewards/rejected": 14.26296615600586,
190
+ "step": 110
191
+ },
192
+ {
193
+ "epoch": 0.10666666666666667,
194
+ "grad_norm": 1.6913721561431885,
195
+ "learning_rate": 4.999409761242696e-06,
196
+ "logits/chosen": -0.22222033143043518,
197
+ "logits/rejected": -0.23720571398735046,
198
+ "logps/chosen": -4.4953508377075195,
199
+ "logps/rejected": -34.074745178222656,
200
+ "loss": 0.2552,
201
+ "rewards/accuracies": 0.8999999761581421,
202
+ "rewards/chosen": 17.04866600036621,
203
+ "rewards/margins": 3.1014418601989746,
204
+ "rewards/rejected": 13.947224617004395,
205
+ "step": 120
206
+ },
207
+ {
208
+ "epoch": 0.10666666666666667,
209
+ "eval_logits/chosen": -0.206527978181839,
210
+ "eval_logits/rejected": -0.22178640961647034,
211
+ "eval_logps/chosen": -3.69442081451416,
212
+ "eval_logps/rejected": -36.072166442871094,
213
+ "eval_loss": 0.18996010720729828,
214
+ "eval_rewards/accuracies": 0.9320000410079956,
215
+ "eval_rewards/chosen": 17.133451461791992,
216
+ "eval_rewards/margins": 3.379946708679199,
217
+ "eval_rewards/rejected": 13.753504753112793,
218
+ "eval_runtime": 361.5279,
219
+ "eval_samples_per_second": 2.766,
220
+ "eval_steps_per_second": 0.346,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.11555555555555555,
225
+ "grad_norm": 61.80262756347656,
226
+ "learning_rate": 4.996519466816778e-06,
227
+ "logits/chosen": -0.18473535776138306,
228
+ "logits/rejected": -0.1988501250743866,
229
+ "logps/chosen": -3.7009687423706055,
230
+ "logps/rejected": -39.289939880371094,
231
+ "loss": 0.1394,
232
+ "rewards/accuracies": 0.9624999761581421,
233
+ "rewards/chosen": 17.106964111328125,
234
+ "rewards/margins": 3.633338212966919,
235
+ "rewards/rejected": 13.473625183105469,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.12444444444444444,
240
+ "grad_norm": 1.6732702255249023,
241
+ "learning_rate": 4.9912234871722805e-06,
242
+ "logits/chosen": -0.16134041547775269,
243
+ "logits/rejected": -0.17547868192195892,
244
+ "logps/chosen": -3.0637736320495605,
245
+ "logps/rejected": -40.07548522949219,
246
+ "loss": 0.1408,
247
+ "rewards/accuracies": 0.9750000238418579,
248
+ "rewards/chosen": 17.392223358154297,
249
+ "rewards/margins": 4.242353439331055,
250
+ "rewards/rejected": 13.149867057800293,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.13333333333333333,
255
+ "grad_norm": 0.346453994512558,
256
+ "learning_rate": 4.98352692559805e-06,
257
+ "logits/chosen": -0.13797929883003235,
258
+ "logits/rejected": -0.15283086895942688,
259
+ "logps/chosen": -5.14492130279541,
260
+ "logps/rejected": -47.97212219238281,
261
+ "loss": 0.2153,
262
+ "rewards/accuracies": 0.9375,
263
+ "rewards/chosen": 16.896778106689453,
264
+ "rewards/margins": 4.227695465087891,
265
+ "rewards/rejected": 12.669081687927246,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.14222222222222222,
270
+ "grad_norm": 0.21871662139892578,
271
+ "learning_rate": 4.973437198621237e-06,
272
+ "logits/chosen": -0.12396670132875443,
273
+ "logits/rejected": -0.13780555129051208,
274
+ "logps/chosen": -6.108860015869141,
275
+ "logps/rejected": -54.90739440917969,
276
+ "loss": 0.0388,
277
+ "rewards/accuracies": 0.9750000238418579,
278
+ "rewards/chosen": 16.75935935974121,
279
+ "rewards/margins": 4.755282878875732,
280
+ "rewards/rejected": 12.004077911376953,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.1511111111111111,
285
+ "grad_norm": 235.12429809570312,
286
+ "learning_rate": 4.960964028860621e-06,
287
+ "logits/chosen": -0.1140839159488678,
288
+ "logits/rejected": -0.1263057291507721,
289
+ "logps/chosen": -12.605452537536621,
290
+ "logps/rejected": -53.81230926513672,
291
+ "loss": 0.4651,
292
+ "rewards/accuracies": 0.875,
293
+ "rewards/chosen": 16.101238250732422,
294
+ "rewards/margins": 3.9864249229431152,
295
+ "rewards/rejected": 12.114812850952148,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.16,
300
+ "grad_norm": 190.97048950195312,
301
+ "learning_rate": 4.946119435657738e-06,
302
+ "logits/chosen": -0.10746976733207703,
303
+ "logits/rejected": -0.11878640949726105,
304
+ "logps/chosen": -8.5105562210083,
305
+ "logps/rejected": -51.314781188964844,
306
+ "loss": 0.2362,
307
+ "rewards/accuracies": 0.925000011920929,
308
+ "rewards/chosen": 16.719980239868164,
309
+ "rewards/margins": 4.549674034118652,
310
+ "rewards/rejected": 12.170306205749512,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.16,
315
+ "eval_logits/chosen": -0.10870806127786636,
316
+ "eval_logits/rejected": -0.12223993986845016,
317
+ "eval_logps/chosen": -4.414996147155762,
318
+ "eval_logps/rejected": -53.885032653808594,
319
+ "eval_loss": 0.20236633718013763,
320
+ "eval_rewards/accuracies": 0.9510000944137573,
321
+ "eval_rewards/chosen": 17.06139373779297,
322
+ "eval_rewards/margins": 5.089176177978516,
323
+ "eval_rewards/rejected": 11.97221851348877,
324
+ "eval_runtime": 361.4355,
325
+ "eval_samples_per_second": 2.767,
326
+ "eval_steps_per_second": 0.346,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 0.1688888888888889,
331
+ "grad_norm": 56.81266784667969,
332
+ "learning_rate": 4.928917723494854e-06,
333
+ "logits/chosen": -0.10682469606399536,
334
+ "logits/rejected": -0.12124393880367279,
335
+ "logps/chosen": -3.058413028717041,
336
+ "logps/rejected": -55.052528381347656,
337
+ "loss": 0.2442,
338
+ "rewards/accuracies": 0.9500000476837158,
339
+ "rewards/chosen": 17.058589935302734,
340
+ "rewards/margins": 5.056097984313965,
341
+ "rewards/rejected": 12.002490043640137,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 0.17777777777777778,
346
+ "grad_norm": 175.06552124023438,
347
+ "learning_rate": 4.909375468210947e-06,
348
+ "logits/chosen": -0.10520349442958832,
349
+ "logits/rejected": -0.12018950283527374,
350
+ "logps/chosen": -4.114959716796875,
351
+ "logps/rejected": -55.9394645690918,
352
+ "loss": 0.1915,
353
+ "rewards/accuracies": 0.9500000476837158,
354
+ "rewards/chosen": 16.98603057861328,
355
+ "rewards/margins": 5.105838775634766,
356
+ "rewards/rejected": 11.880191802978516,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.18666666666666668,
361
+ "grad_norm": 78.06558990478516,
362
+ "learning_rate": 4.8875115010289655e-06,
363
+ "logits/chosen": -0.10475558042526245,
364
+ "logits/rejected": -0.11949175596237183,
365
+ "logps/chosen": -6.760301113128662,
366
+ "logps/rejected": -53.91607666015625,
367
+ "loss": 0.2843,
368
+ "rewards/accuracies": 0.9375,
369
+ "rewards/chosen": 16.857545852661133,
370
+ "rewards/margins": 4.917357921600342,
371
+ "rewards/rejected": 11.94018840789795,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.19555555555555557,
376
+ "grad_norm": 15.880486488342285,
377
+ "learning_rate": 4.863346890409768e-06,
378
+ "logits/chosen": -0.11213523149490356,
379
+ "logits/rejected": -0.12581588327884674,
380
+ "logps/chosen": -6.759585380554199,
381
+ "logps/rejected": -51.10936737060547,
382
+ "loss": 0.5104,
383
+ "rewards/accuracies": 0.875,
384
+ "rewards/chosen": 16.859071731567383,
385
+ "rewards/margins": 4.638372898101807,
386
+ "rewards/rejected": 12.220698356628418,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 0.20444444444444446,
391
+ "grad_norm": 46.97845458984375,
392
+ "learning_rate": 4.836904921750224e-06,
393
+ "logits/chosen": -0.11947059631347656,
394
+ "logits/rejected": -0.1329912692308426,
395
+ "logps/chosen": -3.608184814453125,
396
+ "logps/rejected": -48.794761657714844,
397
+ "loss": 0.2134,
398
+ "rewards/accuracies": 0.925000011920929,
399
+ "rewards/chosen": 17.235904693603516,
400
+ "rewards/margins": 4.859888076782227,
401
+ "rewards/rejected": 12.376014709472656,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 0.21333333333333335,
406
+ "grad_norm": 24.032859802246094,
407
+ "learning_rate": 4.808211074945042e-06,
408
+ "logits/chosen": -0.1200513243675232,
409
+ "logits/rejected": -0.1333036869764328,
410
+ "logps/chosen": -3.7552154064178467,
411
+ "logps/rejected": -49.87453079223633,
412
+ "loss": 0.1781,
413
+ "rewards/accuracies": 0.9500000476837158,
414
+ "rewards/chosen": 17.094650268554688,
415
+ "rewards/margins": 4.68077278137207,
416
+ "rewards/rejected": 12.41387939453125,
417
+ "step": 240
418
+ },
419
+ {
420
+ "epoch": 0.21333333333333335,
421
+ "eval_logits/chosen": -0.12433278560638428,
422
+ "eval_logits/rejected": -0.13808581233024597,
423
+ "eval_logps/chosen": -4.408891201019287,
424
+ "eval_logps/rejected": -50.744781494140625,
425
+ "eval_loss": 0.1546352356672287,
426
+ "eval_rewards/accuracies": 0.9500000476837158,
427
+ "eval_rewards/chosen": 17.06200408935547,
428
+ "eval_rewards/margins": 4.775761604309082,
429
+ "eval_rewards/rejected": 12.286243438720703,
430
+ "eval_runtime": 361.4974,
431
+ "eval_samples_per_second": 2.766,
432
+ "eval_steps_per_second": 0.346,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 0.2222222222222222,
437
+ "grad_norm": 0.25737640261650085,
438
+ "learning_rate": 4.7772929998339485e-06,
439
+ "logits/chosen": -0.12348780035972595,
440
+ "logits/rejected": -0.13704943656921387,
441
+ "logps/chosen": -4.4299187660217285,
442
+ "logps/rejected": -53.074607849121094,
443
+ "loss": 0.1373,
444
+ "rewards/accuracies": 0.9375,
445
+ "rewards/chosen": 17.087068557739258,
446
+ "rewards/margins": 5.06691837310791,
447
+ "rewards/rejected": 12.020149230957031,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 0.2311111111111111,
452
+ "grad_norm": 0.1839389204978943,
453
+ "learning_rate": 4.744180489557859e-06,
454
+ "logits/chosen": -0.12177034467458725,
455
+ "logits/rejected": -0.1342695653438568,
456
+ "logps/chosen": -3.775188446044922,
457
+ "logps/rejected": -53.98720932006836,
458
+ "loss": 0.1896,
459
+ "rewards/accuracies": 0.949999988079071,
460
+ "rewards/chosen": 17.12021255493164,
461
+ "rewards/margins": 5.148064613342285,
462
+ "rewards/rejected": 11.972146987915039,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 0.24,
467
+ "grad_norm": 12.258485794067383,
468
+ "learning_rate": 4.708905451849754e-06,
469
+ "logits/chosen": -0.11067859083414078,
470
+ "logits/rejected": -0.12377731502056122,
471
+ "logps/chosen": -6.418317794799805,
472
+ "logps/rejected": -56.57402801513672,
473
+ "loss": 0.2315,
474
+ "rewards/accuracies": 0.9375,
475
+ "rewards/chosen": 16.738832473754883,
476
+ "rewards/margins": 4.884931564331055,
477
+ "rewards/rejected": 11.853900909423828,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 0.24888888888888888,
482
+ "grad_norm": 77.56194305419922,
483
+ "learning_rate": 4.671501878287879e-06,
484
+ "logits/chosen": -0.1184445172548294,
485
+ "logits/rejected": -0.1339874565601349,
486
+ "logps/chosen": -10.12116527557373,
487
+ "logps/rejected": -53.403907775878906,
488
+ "loss": 0.5343,
489
+ "rewards/accuracies": 0.862500011920929,
490
+ "rewards/chosen": 16.458633422851562,
491
+ "rewards/margins": 4.402472496032715,
492
+ "rewards/rejected": 12.056160926818848,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.2577777777777778,
497
+ "grad_norm": 67.53883361816406,
498
+ "learning_rate": 4.6320058115409295e-06,
499
+ "logits/chosen": -0.1448262631893158,
500
+ "logits/rejected": -0.15793387591838837,
501
+ "logps/chosen": -3.4666190147399902,
502
+ "logps/rejected": -48.79213333129883,
503
+ "loss": 0.5017,
504
+ "rewards/accuracies": 0.887499988079071,
505
+ "rewards/chosen": 16.945899963378906,
506
+ "rewards/margins": 4.2686333656311035,
507
+ "rewards/rejected": 12.677268028259277,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.26666666666666666,
512
+ "grad_norm": 0.17521341145038605,
513
+ "learning_rate": 4.590455310636778e-06,
514
+ "logits/chosen": -0.16128253936767578,
515
+ "logits/rejected": -0.17375555634498596,
516
+ "logps/chosen": -2.9032950401306152,
517
+ "logps/rejected": -47.69734191894531,
518
+ "loss": 0.265,
519
+ "rewards/accuracies": 0.925000011920929,
520
+ "rewards/chosen": 17.18383026123047,
521
+ "rewards/margins": 4.541309356689453,
522
+ "rewards/rejected": 12.642518997192383,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.26666666666666666,
527
+ "eval_logits/chosen": -0.17444846034049988,
528
+ "eval_logits/rejected": -0.18559777736663818,
529
+ "eval_logps/chosen": -2.535512924194336,
530
+ "eval_logps/rejected": -47.16367721557617,
531
+ "eval_loss": 0.15360687673091888,
532
+ "eval_rewards/accuracies": 0.9440000653266907,
533
+ "eval_rewards/chosen": 17.249343872070312,
534
+ "eval_rewards/margins": 4.604989051818848,
535
+ "eval_rewards/rejected": 12.644353866577148,
536
+ "eval_runtime": 361.4575,
537
+ "eval_samples_per_second": 2.767,
538
+ "eval_steps_per_second": 0.346,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 0.27555555555555555,
543
+ "grad_norm": 0.5040452480316162,
544
+ "learning_rate": 4.54689041428819e-06,
545
+ "logits/chosen": -0.16974106431007385,
546
+ "logits/rejected": -0.1810058057308197,
547
+ "logps/chosen": -1.233938217163086,
548
+ "logps/rejected": -49.907745361328125,
549
+ "loss": 0.1132,
550
+ "rewards/accuracies": 0.9500000476837158,
551
+ "rewards/chosen": 17.34117889404297,
552
+ "rewards/margins": 4.934173583984375,
553
+ "rewards/rejected": 12.407005310058594,
554
+ "step": 310
555
+ },
556
+ {
557
+ "epoch": 0.28444444444444444,
558
+ "grad_norm": 100.02949523925781,
559
+ "learning_rate": 4.501353102310901e-06,
560
+ "logits/chosen": -0.15705889463424683,
561
+ "logits/rejected": -0.1695334017276764,
562
+ "logps/chosen": -1.0820492506027222,
563
+ "logps/rejected": -52.577110290527344,
564
+ "loss": 0.1194,
565
+ "rewards/accuracies": 0.9500000476837158,
566
+ "rewards/chosen": 17.33388900756836,
567
+ "rewards/margins": 5.154760837554932,
568
+ "rewards/rejected": 12.179126739501953,
569
+ "step": 320
570
+ },
571
+ {
572
+ "epoch": 0.29333333333333333,
573
+ "grad_norm": 0.2689219117164612,
574
+ "learning_rate": 4.453887255171206e-06,
575
+ "logits/chosen": -0.13849371671676636,
576
+ "logits/rejected": -0.14990833401679993,
577
+ "logps/chosen": -1.8435032367706299,
578
+ "logps/rejected": -54.79044723510742,
579
+ "loss": 0.0926,
580
+ "rewards/accuracies": 0.9500000476837158,
581
+ "rewards/chosen": 17.2423095703125,
582
+ "rewards/margins": 5.28987979888916,
583
+ "rewards/rejected": 11.952428817749023,
584
+ "step": 330
585
+ },
586
+ {
587
+ "epoch": 0.3022222222222222,
588
+ "grad_norm": 0.09305431693792343,
589
+ "learning_rate": 4.404538611702055e-06,
590
+ "logits/chosen": -0.12299702316522598,
591
+ "logits/rejected": -0.13453055918216705,
592
+ "logps/chosen": -2.9897143840789795,
593
+ "logps/rejected": -52.954498291015625,
594
+ "loss": 0.2873,
595
+ "rewards/accuracies": 0.925000011920929,
596
+ "rewards/chosen": 17.17474365234375,
597
+ "rewards/margins": 5.071004867553711,
598
+ "rewards/rejected": 12.103739738464355,
599
+ "step": 340
600
+ },
601
+ {
602
+ "epoch": 0.3111111111111111,
603
+ "grad_norm": 59.282073974609375,
604
+ "learning_rate": 4.3533547250284015e-06,
605
+ "logits/chosen": -0.11913029849529266,
606
+ "logits/rejected": -0.12785324454307556,
607
+ "logps/chosen": -3.9456872940063477,
608
+ "logps/rejected": -48.68487548828125,
609
+ "loss": 0.4332,
610
+ "rewards/accuracies": 0.875,
611
+ "rewards/chosen": 17.12805938720703,
612
+ "rewards/margins": 4.669450283050537,
613
+ "rewards/rejected": 12.458610534667969,
614
+ "step": 350
615
+ },
616
+ {
617
+ "epoch": 0.32,
618
+ "grad_norm": 0.31101909279823303,
619
+ "learning_rate": 4.300384916744261e-06,
620
+ "logits/chosen": -0.11280188709497452,
621
+ "logits/rejected": -0.12300585210323334,
622
+ "logps/chosen": -2.1714723110198975,
623
+ "logps/rejected": -54.74174118041992,
624
+ "loss": 0.1605,
625
+ "rewards/accuracies": 0.9500000476837158,
626
+ "rewards/chosen": 17.326162338256836,
627
+ "rewards/margins": 5.467062473297119,
628
+ "rewards/rejected": 11.859098434448242,
629
+ "step": 360
630
+ },
631
+ {
632
+ "epoch": 0.32,
633
+ "eval_logits/chosen": -0.10620756447315216,
634
+ "eval_logits/rejected": -0.11727114766836166,
635
+ "eval_logps/chosen": -1.4165427684783936,
636
+ "eval_logps/rejected": -50.9525146484375,
637
+ "eval_loss": 0.3194349706172943,
638
+ "eval_rewards/accuracies": 0.9210000038146973,
639
+ "eval_rewards/chosen": 17.36124038696289,
640
+ "eval_rewards/margins": 5.095769882202148,
641
+ "eval_rewards/rejected": 12.26546859741211,
642
+ "eval_runtime": 361.5072,
643
+ "eval_samples_per_second": 2.766,
644
+ "eval_steps_per_second": 0.346,
645
+ "step": 360
646
+ },
647
+ {
648
+ "epoch": 0.3288888888888889,
649
+ "grad_norm": 6.1126532554626465,
650
+ "learning_rate": 4.24568022938566e-06,
651
+ "logits/chosen": -0.10354311764240265,
652
+ "logits/rejected": -0.11526636779308319,
653
+ "logps/chosen": -1.2935255765914917,
654
+ "logps/rejected": -55.57566833496094,
655
+ "loss": 0.1711,
656
+ "rewards/accuracies": 0.9500000476837158,
657
+ "rewards/chosen": 17.439346313476562,
658
+ "rewards/margins": 5.700921058654785,
659
+ "rewards/rejected": 11.738424301147461,
660
+ "step": 370
661
+ },
662
+ {
663
+ "epoch": 0.3377777777777778,
664
+ "grad_norm": 34.15927505493164,
665
+ "learning_rate": 4.189293377245241e-06,
666
+ "logits/chosen": -0.1029932051897049,
667
+ "logits/rejected": -0.11382515728473663,
668
+ "logps/chosen": -2.5132687091827393,
669
+ "logps/rejected": -55.50346374511719,
670
+ "loss": 0.4359,
671
+ "rewards/accuracies": 0.8875000476837158,
672
+ "rewards/chosen": 16.731037139892578,
673
+ "rewards/margins": 4.368172645568848,
674
+ "rewards/rejected": 12.362865447998047,
675
+ "step": 380
676
+ },
677
+ {
678
+ "epoch": 0.3466666666666667,
679
+ "grad_norm": 2.8422904014587402,
680
+ "learning_rate": 4.131278695575952e-06,
681
+ "logits/chosen": -0.10793520510196686,
682
+ "logits/rejected": -0.12109285593032837,
683
+ "logps/chosen": -3.014652729034424,
684
+ "logps/rejected": -53.98411560058594,
685
+ "loss": 0.2161,
686
+ "rewards/accuracies": 0.949999988079071,
687
+ "rewards/chosen": 17.137393951416016,
688
+ "rewards/margins": 5.105995178222656,
689
+ "rewards/rejected": 12.03139877319336,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 0.35555555555555557,
694
+ "grad_norm": 54.0329475402832,
695
+ "learning_rate": 4.071692088232743e-06,
696
+ "logits/chosen": -0.10393750667572021,
697
+ "logits/rejected": -0.11834606528282166,
698
+ "logps/chosen": -2.1508543491363525,
699
+ "logps/rejected": -45.60733413696289,
700
+ "loss": 0.2077,
701
+ "rewards/accuracies": 0.925000011920929,
702
+ "rewards/chosen": 17.586124420166016,
703
+ "rewards/margins": 5.077212333679199,
704
+ "rewards/rejected": 12.5089111328125,
705
+ "step": 400
706
+ },
707
+ {
708
+ "epoch": 0.36444444444444446,
709
+ "grad_norm": 81.61144256591797,
710
+ "learning_rate": 4.010590973802737e-06,
711
+ "logits/chosen": -0.09564584493637085,
712
+ "logits/rejected": -0.10617707669734955,
713
+ "logps/chosen": -3.4572842121124268,
714
+ "logps/rejected": -50.92162322998047,
715
+ "loss": 0.2478,
716
+ "rewards/accuracies": 0.8875000476837158,
717
+ "rewards/chosen": 17.010910034179688,
718
+ "rewards/margins": 4.556198596954346,
719
+ "rewards/rejected": 12.454713821411133,
720
+ "step": 410
721
+ },
722
+ {
723
+ "epoch": 0.37333333333333335,
724
+ "grad_norm": 0.30974289774894714,
725
+ "learning_rate": 3.948034230275781e-06,
726
+ "logits/chosen": -0.09134417027235031,
727
+ "logits/rejected": -0.1020016297698021,
728
+ "logps/chosen": -5.046698570251465,
729
+ "logps/rejected": -48.908958435058594,
730
+ "loss": 0.2894,
731
+ "rewards/accuracies": 0.8999999761581421,
732
+ "rewards/chosen": 17.007888793945312,
733
+ "rewards/margins": 4.53641414642334,
734
+ "rewards/rejected": 12.471475601196289,
735
+ "step": 420
736
+ },
737
+ {
738
+ "epoch": 0.37333333333333335,
739
+ "eval_logits/chosen": -0.09054450690746307,
740
+ "eval_logits/rejected": -0.10264354199171066,
741
+ "eval_logps/chosen": -1.913105845451355,
742
+ "eval_logps/rejected": -51.11127471923828,
743
+ "eval_loss": 0.16789735853672028,
744
+ "eval_rewards/accuracies": 0.9450000524520874,
745
+ "eval_rewards/chosen": 17.311582565307617,
746
+ "eval_rewards/margins": 5.061989784240723,
747
+ "eval_rewards/rejected": 12.249593734741211,
748
+ "eval_runtime": 361.5337,
749
+ "eval_samples_per_second": 2.766,
750
+ "eval_steps_per_second": 0.346,
751
+ "step": 420
752
+ },
753
+ {
754
+ "epoch": 0.38222222222222224,
755
+ "grad_norm": 12.824393272399902,
756
+ "learning_rate": 3.884082138308699e-06,
757
+ "logits/chosen": -0.08666776865720749,
758
+ "logits/rejected": -0.0997733399271965,
759
+ "logps/chosen": -1.7306327819824219,
760
+ "logps/rejected": -54.273292541503906,
761
+ "loss": 0.2298,
762
+ "rewards/accuracies": 0.9500000476837158,
763
+ "rewards/chosen": 17.167621612548828,
764
+ "rewards/margins": 5.065673351287842,
765
+ "rewards/rejected": 12.101947784423828,
766
+ "step": 430
767
+ },
768
+ {
769
+ "epoch": 0.39111111111111113,
770
+ "grad_norm": 0.30713599920272827,
771
+ "learning_rate": 3.818796323137896e-06,
772
+ "logits/chosen": -0.09174907952547073,
773
+ "logits/rejected": -0.10376611351966858,
774
+ "logps/chosen": -1.489154577255249,
775
+ "logps/rejected": -54.580726623535156,
776
+ "loss": 0.2513,
777
+ "rewards/accuracies": 0.9375,
778
+ "rewards/chosen": 17.22280502319336,
779
+ "rewards/margins": 5.175349235534668,
780
+ "rewards/rejected": 12.047454833984375,
781
+ "step": 440
782
+ },
783
+ {
784
+ "epoch": 0.4,
785
+ "grad_norm": 87.4791488647461,
786
+ "learning_rate": 3.7522396951963303e-06,
787
+ "logits/chosen": -0.09688778221607208,
788
+ "logits/rejected": -0.10897806286811829,
789
+ "logps/chosen": -3.157695770263672,
790
+ "logps/rejected": -50.96417236328125,
791
+ "loss": 0.1758,
792
+ "rewards/accuracies": 0.9500000476837158,
793
+ "rewards/chosen": 17.345651626586914,
794
+ "rewards/margins": 5.245656967163086,
795
+ "rewards/rejected": 12.099993705749512,
796
+ "step": 450
797
+ },
798
+ {
799
+ "epoch": 0.4088888888888889,
800
+ "grad_norm": 146.2008056640625,
801
+ "learning_rate": 3.684476389492026e-06,
802
+ "logits/chosen": -0.09378582239151001,
803
+ "logits/rejected": -0.10475654900074005,
804
+ "logps/chosen": -0.5611928701400757,
805
+ "logps/rejected": -56.518890380859375,
806
+ "loss": 0.1981,
807
+ "rewards/accuracies": 0.9500000476837158,
808
+ "rewards/chosen": 17.113712310791016,
809
+ "rewards/margins": 5.068872928619385,
810
+ "rewards/rejected": 12.044839859008789,
811
+ "step": 460
812
+ },
813
+ {
814
+ "epoch": 0.4177777777777778,
815
+ "grad_norm": 1.9137721061706543,
816
+ "learning_rate": 3.6155717038065783e-06,
817
+ "logits/chosen": -0.08695463836193085,
818
+ "logits/rejected": -0.09596743434667587,
819
+ "logps/chosen": -1.5298550128936768,
820
+ "logps/rejected": -50.27445983886719,
821
+ "loss": 0.2066,
822
+ "rewards/accuracies": 0.9375,
823
+ "rewards/chosen": 17.35186004638672,
824
+ "rewards/margins": 5.014693260192871,
825
+ "rewards/rejected": 12.337167739868164,
826
+ "step": 470
827
+ },
828
+ {
829
+ "epoch": 0.4266666666666667,
830
+ "grad_norm": 84.80391693115234,
831
+ "learning_rate": 3.545592035773192e-06,
832
+ "logits/chosen": -0.0746893435716629,
833
+ "logits/rejected": -0.08653923869132996,
834
+ "logps/chosen": -2.0052125453948975,
835
+ "logps/rejected": -57.502811431884766,
836
+ "loss": 0.1149,
837
+ "rewards/accuracies": 0.9500000476837158,
838
+ "rewards/chosen": 17.14373016357422,
839
+ "rewards/margins": 5.360415935516357,
840
+ "rewards/rejected": 11.783313751220703,
841
+ "step": 480
842
+ },
843
+ {
844
+ "epoch": 0.4266666666666667,
845
+ "eval_logits/chosen": -0.07700399309396744,
846
+ "eval_logits/rejected": -0.08828537166118622,
847
+ "eval_logps/chosen": -4.48896598815918,
848
+ "eval_logps/rejected": -53.76282501220703,
849
+ "eval_loss": 0.29511645436286926,
850
+ "eval_rewards/accuracies": 0.9230000376701355,
851
+ "eval_rewards/chosen": 17.053997039794922,
852
+ "eval_rewards/margins": 5.069558143615723,
853
+ "eval_rewards/rejected": 11.984437942504883,
854
+ "eval_runtime": 361.5035,
855
+ "eval_samples_per_second": 2.766,
856
+ "eval_steps_per_second": 0.346,
857
+ "step": 480
858
+ },
859
+ {
860
+ "epoch": 0.43555555555555553,
861
+ "grad_norm": 82.9616470336914,
862
+ "learning_rate": 3.4746048188948806e-06,
863
+ "logits/chosen": -0.06675051152706146,
864
+ "logits/rejected": -0.07860895991325378,
865
+ "logps/chosen": -4.162237167358398,
866
+ "logps/rejected": -54.77789306640625,
867
+ "loss": 0.2979,
868
+ "rewards/accuracies": 0.9125000238418579,
869
+ "rewards/chosen": 17.047603607177734,
870
+ "rewards/margins": 5.138361930847168,
871
+ "rewards/rejected": 11.909242630004883,
872
+ "step": 490
873
+ },
874
+ {
875
+ "epoch": 0.4444444444444444,
876
+ "grad_norm": 0.04293210059404373,
877
+ "learning_rate": 3.4026784575644887e-06,
878
+ "logits/chosen": -0.06424491107463837,
879
+ "logits/rejected": -0.07567107677459717,
880
+ "logps/chosen": -2.05729603767395,
881
+ "logps/rejected": -56.646087646484375,
882
+ "loss": 0.4378,
883
+ "rewards/accuracies": 0.8875000476837158,
884
+ "rewards/chosen": 16.947803497314453,
885
+ "rewards/margins": 4.919981956481934,
886
+ "rewards/rejected": 12.02782154083252,
887
+ "step": 500
888
+ },
889
+ {
890
+ "epoch": 0.4533333333333333,
891
+ "grad_norm": 0.07136644423007965,
892
+ "learning_rate": 3.329882261149148e-06,
893
+ "logits/chosen": -0.06423303484916687,
894
+ "logits/rejected": -0.07512776553630829,
895
+ "logps/chosen": -3.1519265174865723,
896
+ "logps/rejected": -53.53008270263672,
897
+ "loss": 0.2613,
898
+ "rewards/accuracies": 0.9375,
899
+ "rewards/chosen": 17.112144470214844,
900
+ "rewards/margins": 5.046430587768555,
901
+ "rewards/rejected": 12.065712928771973,
902
+ "step": 510
903
+ },
904
+ {
905
+ "epoch": 0.4622222222222222,
906
+ "grad_norm": 0.29279613494873047,
907
+ "learning_rate": 3.25628637720269e-06,
908
+ "logits/chosen": -0.060233693569898605,
909
+ "logits/rejected": -0.07076811790466309,
910
+ "logps/chosen": -1.2358124256134033,
911
+ "logps/rejected": -50.61806106567383,
912
+ "loss": 0.1517,
913
+ "rewards/accuracies": 0.9375,
914
+ "rewards/chosen": 17.457351684570312,
915
+ "rewards/margins": 5.2062835693359375,
916
+ "rewards/rejected": 12.251070022583008,
917
+ "step": 520
918
+ },
919
+ {
920
+ "epoch": 0.4711111111111111,
921
+ "grad_norm": 0.7946074604988098,
922
+ "learning_rate": 3.181961723870359e-06,
923
+ "logits/chosen": -0.054482050240039825,
924
+ "logits/rejected": -0.0657092034816742,
925
+ "logps/chosen": -0.7682158946990967,
926
+ "logps/rejected": -58.81409454345703,
927
+ "loss": 0.2304,
928
+ "rewards/accuracies": 0.9375,
929
+ "rewards/chosen": 17.075973510742188,
930
+ "rewards/margins": 5.222441673278809,
931
+ "rewards/rejected": 11.853530883789062,
932
+ "step": 530
933
+ },
934
+ {
935
+ "epoch": 0.48,
936
+ "grad_norm": 3.1405210494995117,
937
+ "learning_rate": 3.1069799215509847e-06,
938
+ "logits/chosen": -0.05050881579518318,
939
+ "logits/rejected": -0.061149902641773224,
940
+ "logps/chosen": -0.954046368598938,
941
+ "logps/rejected": -52.70227813720703,
942
+ "loss": 0.0384,
943
+ "rewards/accuracies": 0.987500011920929,
944
+ "rewards/chosen": 17.378849029541016,
945
+ "rewards/margins": 5.253483295440674,
946
+ "rewards/rejected": 12.125364303588867,
947
+ "step": 540
948
+ },
949
+ {
950
+ "epoch": 0.48,
951
+ "eval_logits/chosen": -0.05120665580034256,
952
+ "eval_logits/rejected": -0.06123337894678116,
953
+ "eval_logps/chosen": -2.9872913360595703,
954
+ "eval_logps/rejected": -52.27314758300781,
955
+ "eval_loss": 0.17387841641902924,
956
+ "eval_rewards/accuracies": 0.9490000605583191,
957
+ "eval_rewards/chosen": 17.204164505004883,
958
+ "eval_rewards/margins": 5.070757865905762,
959
+ "eval_rewards/rejected": 12.133406639099121,
960
+ "eval_runtime": 361.5449,
961
+ "eval_samples_per_second": 2.766,
962
+ "eval_steps_per_second": 0.346,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.4888888888888889,
967
+ "grad_norm": 0.07270358502864838,
968
+ "learning_rate": 3.0314132238824416e-06,
969
+ "logits/chosen": -0.05125313252210617,
970
+ "logits/rejected": -0.06174170970916748,
971
+ "logps/chosen": -3.2163877487182617,
972
+ "logps/rejected": -47.79279327392578,
973
+ "loss": 0.2087,
974
+ "rewards/accuracies": 0.9125000238418579,
975
+ "rewards/chosen": 17.408517837524414,
976
+ "rewards/margins": 5.061524391174316,
977
+ "rewards/rejected": 12.346992492675781,
978
+ "step": 550
979
+ },
980
+ {
981
+ "epoch": 0.49777777777777776,
982
+ "grad_norm": 0.10005924850702286,
983
+ "learning_rate": 2.955334448116915e-06,
984
+ "logits/chosen": -0.041773442178964615,
985
+ "logits/rejected": -0.05364570394158363,
986
+ "logps/chosen": -0.363404780626297,
987
+ "logps/rejected": -56.32415008544922,
988
+ "loss": 0.0969,
989
+ "rewards/accuracies": 0.9624999761581421,
990
+ "rewards/chosen": 17.3824462890625,
991
+ "rewards/margins": 5.5404510498046875,
992
+ "rewards/rejected": 11.841995239257812,
993
+ "step": 560
994
+ },
995
+ {
996
+ "epoch": 0.5066666666666667,
997
+ "grad_norm": 7.818356990814209,
998
+ "learning_rate": 2.8788169049530533e-06,
999
+ "logits/chosen": -0.04309462010860443,
1000
+ "logits/rejected": -0.05494442582130432,
1001
+ "logps/chosen": -2.2242724895477295,
1002
+ "logps/rejected": -56.444740295410156,
1003
+ "loss": 0.1447,
1004
+ "rewards/accuracies": 0.949999988079071,
1005
+ "rewards/chosen": 17.275390625,
1006
+ "rewards/margins": 5.55007791519165,
1007
+ "rewards/rejected": 11.725313186645508,
1008
+ "step": 570
1009
+ },
1010
+ {
1011
+ "epoch": 0.5155555555555555,
1012
+ "grad_norm": 0.03519747406244278,
1013
+ "learning_rate": 2.8019343278926397e-06,
1014
+ "logits/chosen": -0.03506368771195412,
1015
+ "logits/rejected": -0.046854715794324875,
1016
+ "logps/chosen": -0.5200096964836121,
1017
+ "logps/rejected": -59.05330276489258,
1018
+ "loss": 0.0998,
1019
+ "rewards/accuracies": 0.949999988079071,
1020
+ "rewards/chosen": 17.320554733276367,
1021
+ "rewards/margins": 5.727260589599609,
1022
+ "rewards/rejected": 11.593294143676758,
1023
+ "step": 580
1024
+ },
1025
+ {
1026
+ "epoch": 0.5244444444444445,
1027
+ "grad_norm": 0.04519123584032059,
1028
+ "learning_rate": 2.7247608021898265e-06,
1029
+ "logits/chosen": -0.03204537555575371,
1030
+ "logits/rejected": -0.04383891448378563,
1031
+ "logps/chosen": -1.1271060705184937,
1032
+ "logps/rejected": -59.308895111083984,
1033
+ "loss": 0.1162,
1034
+ "rewards/accuracies": 0.9750000238418579,
1035
+ "rewards/chosen": 17.256351470947266,
1036
+ "rewards/margins": 5.6881890296936035,
1037
+ "rewards/rejected": 11.568161010742188,
1038
+ "step": 590
1039
+ },
1040
+ {
1041
+ "epoch": 0.5333333333333333,
1042
+ "grad_norm": 0.028689857572317123,
1043
+ "learning_rate": 2.647370693461432e-06,
1044
+ "logits/chosen": -0.02834726870059967,
1045
+ "logits/rejected": -0.03827046602964401,
1046
+ "logps/chosen": -5.673943519592285,
1047
+ "logps/rejected": -55.72624588012695,
1048
+ "loss": 0.4008,
1049
+ "rewards/accuracies": 0.8875000476837158,
1050
+ "rewards/chosen": 16.761056900024414,
1051
+ "rewards/margins": 4.804043769836426,
1052
+ "rewards/rejected": 11.957012176513672,
1053
+ "step": 600
1054
+ },
1055
+ {
1056
+ "epoch": 0.5333333333333333,
1057
+ "eval_logits/chosen": -0.035840023308992386,
1058
+ "eval_logits/rejected": -0.04687971621751785,
1059
+ "eval_logps/chosen": -2.1760435104370117,
1060
+ "eval_logps/rejected": -56.62664031982422,
1061
+ "eval_loss": 0.17061151564121246,
1062
+ "eval_rewards/accuracies": 0.9470000267028809,
1063
+ "eval_rewards/chosen": 17.285289764404297,
1064
+ "eval_rewards/margins": 5.587231636047363,
1065
+ "eval_rewards/rejected": 11.698057174682617,
1066
+ "eval_runtime": 361.5056,
1067
+ "eval_samples_per_second": 2.766,
1068
+ "eval_steps_per_second": 0.346,
1069
+ "step": 600
1070
+ },
1071
+ {
1072
+ "epoch": 0.5422222222222223,
1073
+ "grad_norm": 1.1994622945785522,
1074
+ "learning_rate": 2.569838576027068e-06,
1075
+ "logits/chosen": -0.03231767192482948,
1076
+ "logits/rejected": -0.04397805407643318,
1077
+ "logps/chosen": -1.5904741287231445,
1078
+ "logps/rejected": -62.10520553588867,
1079
+ "loss": 0.1265,
1080
+ "rewards/accuracies": 0.9624999761581421,
1081
+ "rewards/chosen": 17.11206817626953,
1082
+ "rewards/margins": 5.734784126281738,
1083
+ "rewards/rejected": 11.37728500366211,
1084
+ "step": 610
1085
+ },
1086
+ {
1087
+ "epoch": 0.5511111111111111,
1088
+ "grad_norm": 0.14163845777511597,
1089
+ "learning_rate": 2.4922391610481544e-06,
1090
+ "logits/chosen": -0.03293662518262863,
1091
+ "logits/rejected": -0.04335154965519905,
1092
+ "logps/chosen": -1.3568997383117676,
1093
+ "logps/rejected": -58.347434997558594,
1094
+ "loss": 0.097,
1095
+ "rewards/accuracies": 0.9624999761581421,
1096
+ "rewards/chosen": 17.345399856567383,
1097
+ "rewards/margins": 5.823373794555664,
1098
+ "rewards/rejected": 11.522026062011719,
1099
+ "step": 620
1100
+ },
1101
+ {
1102
+ "epoch": 0.56,
1103
+ "grad_norm": 1.125027060508728,
1104
+ "learning_rate": 2.4146472245350804e-06,
1105
+ "logits/chosen": -0.02864963933825493,
1106
+ "logits/rejected": -0.03835710883140564,
1107
+ "logps/chosen": -2.494175434112549,
1108
+ "logps/rejected": -55.33067321777344,
1109
+ "loss": 0.3115,
1110
+ "rewards/accuracies": 0.925000011920929,
1111
+ "rewards/chosen": 17.262205123901367,
1112
+ "rewards/margins": 5.4287261962890625,
1113
+ "rewards/rejected": 11.833479881286621,
1114
+ "step": 630
1115
+ },
1116
+ {
1117
+ "epoch": 0.5688888888888889,
1118
+ "grad_norm": 0.03514016419649124,
1119
+ "learning_rate": 2.337137535291868e-06,
1120
+ "logits/chosen": -0.02757749892771244,
1121
+ "logits/rejected": -0.0376611053943634,
1122
+ "logps/chosen": -2.3163387775421143,
1123
+ "logps/rejected": -53.54579162597656,
1124
+ "loss": 0.1268,
1125
+ "rewards/accuracies": 0.9500000476837158,
1126
+ "rewards/chosen": 17.542863845825195,
1127
+ "rewards/margins": 5.799897193908691,
1128
+ "rewards/rejected": 11.742965698242188,
1129
+ "step": 640
1130
+ },
1131
+ {
1132
+ "epoch": 0.5777777777777777,
1133
+ "grad_norm": 34.20791244506836,
1134
+ "learning_rate": 2.259784782867782e-06,
1135
+ "logits/chosen": -0.02032250165939331,
1136
+ "logits/rejected": -0.031542714685201645,
1137
+ "logps/chosen": -1.7910137176513672,
1138
+ "logps/rejected": -62.980018615722656,
1139
+ "loss": 0.1831,
1140
+ "rewards/accuracies": 0.9500000476837158,
1141
+ "rewards/chosen": 16.982879638671875,
1142
+ "rewards/margins": 5.589078903198242,
1143
+ "rewards/rejected": 11.393800735473633,
1144
+ "step": 650
1145
+ },
1146
+ {
1147
+ "epoch": 0.5866666666666667,
1148
+ "grad_norm": 32.356956481933594,
1149
+ "learning_rate": 2.182663505585314e-06,
1150
+ "logits/chosen": -0.01711965538561344,
1151
+ "logits/rejected": -0.028038471937179565,
1152
+ "logps/chosen": -2.662904739379883,
1153
+ "logps/rejected": -63.326297760009766,
1154
+ "loss": 0.1678,
1155
+ "rewards/accuracies": 0.9624999761581421,
1156
+ "rewards/chosen": 16.997777938842773,
1157
+ "rewards/margins": 5.741157531738281,
1158
+ "rewards/rejected": 11.256620407104492,
1159
+ "step": 660
1160
+ },
1161
+ {
1162
+ "epoch": 0.5866666666666667,
1163
+ "eval_logits/chosen": -0.016018809750676155,
1164
+ "eval_logits/rejected": -0.027020033448934555,
1165
+ "eval_logps/chosen": -3.00822114944458,
1166
+ "eval_logps/rejected": -57.951629638671875,
1167
+ "eval_loss": 0.2050127536058426,
1168
+ "eval_rewards/accuracies": 0.9450000524520874,
1169
+ "eval_rewards/chosen": 17.202072143554688,
1170
+ "eval_rewards/margins": 5.636512756347656,
1171
+ "eval_rewards/rejected": 11.565557479858398,
1172
+ "eval_runtime": 361.5073,
1173
+ "eval_samples_per_second": 2.766,
1174
+ "eval_steps_per_second": 0.346,
1175
+ "step": 660
1176
+ },
1177
+ {
1178
+ "epoch": 0.5955555555555555,
1179
+ "grad_norm": 86.53874969482422,
1180
+ "learning_rate": 2.1058480187138863e-06,
1181
+ "logits/chosen": -0.012352555990219116,
1182
+ "logits/rejected": -0.023966707289218903,
1183
+ "logps/chosen": -2.129209041595459,
1184
+ "logps/rejected": -62.1393928527832,
1185
+ "loss": 0.1025,
1186
+ "rewards/accuracies": 0.9624999761581421,
1187
+ "rewards/chosen": 17.069538116455078,
1188
+ "rewards/margins": 5.678930759429932,
1189
+ "rewards/rejected": 11.390605926513672,
1190
+ "step": 670
1191
+ },
1192
+ {
1193
+ "epoch": 0.6044444444444445,
1194
+ "grad_norm": 2.892672300338745,
1195
+ "learning_rate": 2.0294123428584985e-06,
1196
+ "logits/chosen": -0.011562807485461235,
1197
+ "logits/rejected": -0.020859256386756897,
1198
+ "logps/chosen": -3.2486608028411865,
1199
+ "logps/rejected": -59.56721496582031,
1200
+ "loss": 0.1961,
1201
+ "rewards/accuracies": 0.9500000476837158,
1202
+ "rewards/chosen": 17.051376342773438,
1203
+ "rewards/margins": 5.515361785888672,
1204
+ "rewards/rejected": 11.536016464233398,
1205
+ "step": 680
1206
+ },
1207
+ {
1208
+ "epoch": 0.6133333333333333,
1209
+ "grad_norm": 30.26588249206543,
1210
+ "learning_rate": 1.953430132632311e-06,
1211
+ "logits/chosen": -0.011488726362586021,
1212
+ "logits/rejected": -0.021527227014303207,
1213
+ "logps/chosen": -1.794357180595398,
1214
+ "logps/rejected": -60.649505615234375,
1215
+ "loss": 0.2821,
1216
+ "rewards/accuracies": 0.9375,
1217
+ "rewards/chosen": 17.114843368530273,
1218
+ "rewards/margins": 5.612217903137207,
1219
+ "rewards/rejected": 11.50262451171875,
1220
+ "step": 690
1221
+ },
1222
+ {
1223
+ "epoch": 0.6222222222222222,
1224
+ "grad_norm": 0.030314341187477112,
1225
+ "learning_rate": 1.8779746056819104e-06,
1226
+ "logits/chosen": -0.014436552301049232,
1227
+ "logits/rejected": -0.026044374331831932,
1228
+ "logps/chosen": -3.1617178916931152,
1229
+ "logps/rejected": -60.94548797607422,
1230
+ "loss": 0.3141,
1231
+ "rewards/accuracies": 0.9375,
1232
+ "rewards/chosen": 17.08698272705078,
1233
+ "rewards/margins": 5.719264984130859,
1234
+ "rewards/rejected": 11.367716789245605,
1235
+ "step": 700
1236
+ },
1237
+ {
1238
+ "epoch": 0.6311111111111111,
1239
+ "grad_norm": 0.06711317598819733,
1240
+ "learning_rate": 1.8031184721336364e-06,
1241
+ "logits/chosen": -0.017312290146946907,
1242
+ "logits/rejected": -0.026554957032203674,
1243
+ "logps/chosen": -5.120705604553223,
1244
+ "logps/rejected": -54.33483123779297,
1245
+ "loss": 0.3827,
1246
+ "rewards/accuracies": 0.887499988079071,
1247
+ "rewards/chosen": 17.1247501373291,
1248
+ "rewards/margins": 5.327882766723633,
1249
+ "rewards/rejected": 11.796867370605469,
1250
+ "step": 710
1251
+ },
1252
+ {
1253
+ "epoch": 0.64,
1254
+ "grad_norm": 60.36637496948242,
1255
+ "learning_rate": 1.7289338645289711e-06,
1256
+ "logits/chosen": -0.01987219974398613,
1257
+ "logits/rejected": -0.02766304835677147,
1258
+ "logps/chosen": -2.1757986545562744,
1259
+ "logps/rejected": -54.02531433105469,
1260
+ "loss": 0.2272,
1261
+ "rewards/accuracies": 0.949999988079071,
1262
+ "rewards/chosen": 17.239776611328125,
1263
+ "rewards/margins": 5.250313758850098,
1264
+ "rewards/rejected": 11.989462852478027,
1265
+ "step": 720
1266
+ },
1267
+ {
1268
+ "epoch": 0.64,
1269
+ "eval_logits/chosen": -0.022905193269252777,
1270
+ "eval_logits/rejected": -0.0321992002427578,
1271
+ "eval_logps/chosen": -1.100506067276001,
1272
+ "eval_logps/rejected": -55.91169738769531,
1273
+ "eval_loss": 0.14018221199512482,
1274
+ "eval_rewards/accuracies": 0.9520000219345093,
1275
+ "eval_rewards/chosen": 17.392841339111328,
1276
+ "eval_rewards/margins": 5.623291492462158,
1277
+ "eval_rewards/rejected": 11.769551277160645,
1278
+ "eval_runtime": 361.416,
1279
+ "eval_samples_per_second": 2.767,
1280
+ "eval_steps_per_second": 0.346,
1281
+ "step": 720
1282
+ },
1283
+ {
1284
+ "epoch": 0.6488888888888888,
1285
+ "grad_norm": 0.029294608160853386,
1286
+ "learning_rate": 1.6554922683164875e-06,
1287
+ "logits/chosen": -0.016416028141975403,
1288
+ "logits/rejected": -0.025527067482471466,
1289
+ "logps/chosen": -0.906692385673523,
1290
+ "logps/rejected": -59.142173767089844,
1291
+ "loss": 0.1171,
1292
+ "rewards/accuracies": 0.9624999761581421,
1293
+ "rewards/chosen": 17.282169342041016,
1294
+ "rewards/margins": 5.6978583335876465,
1295
+ "rewards/rejected": 11.584310531616211,
1296
+ "step": 730
1297
+ },
1298
+ {
1299
+ "epoch": 0.6577777777777778,
1300
+ "grad_norm": 33.89070510864258,
1301
+ "learning_rate": 1.5828644529673592e-06,
1302
+ "logits/chosen": -0.016044551506638527,
1303
+ "logits/rejected": -0.025811903178691864,
1304
+ "logps/chosen": -1.1167538166046143,
1305
+ "logps/rejected": -59.84492492675781,
1306
+ "loss": 0.2001,
1307
+ "rewards/accuracies": 0.9624999761581421,
1308
+ "rewards/chosen": 17.266983032226562,
1309
+ "rewards/margins": 5.749438285827637,
1310
+ "rewards/rejected": 11.51754379272461,
1311
+ "step": 740
1312
+ },
1313
+ {
1314
+ "epoch": 0.6666666666666666,
1315
+ "grad_norm": 0.53726726770401,
1316
+ "learning_rate": 1.5111204037807844e-06,
1317
+ "logits/chosen": -0.012529855594038963,
1318
+ "logits/rejected": -0.023765765130519867,
1319
+ "logps/chosen": -0.3461765646934509,
1320
+ "logps/rejected": -55.841102600097656,
1321
+ "loss": 0.1692,
1322
+ "rewards/accuracies": 0.9375,
1323
+ "rewards/chosen": 17.61865997314453,
1324
+ "rewards/margins": 6.019055366516113,
1325
+ "rewards/rejected": 11.599604606628418,
1326
+ "step": 750
1327
+ },
1328
+ {
1329
+ "epoch": 0.6755555555555556,
1330
+ "grad_norm": 0.7173987627029419,
1331
+ "learning_rate": 1.4403292544450625e-06,
1332
+ "logits/chosen": -0.013380522839725018,
1333
+ "logits/rejected": -0.022107835859060287,
1334
+ "logps/chosen": -3.2759666442871094,
1335
+ "logps/rejected": -53.836822509765625,
1336
+ "loss": 0.2418,
1337
+ "rewards/accuracies": 0.949999988079071,
1338
+ "rewards/chosen": 17.383333206176758,
1339
+ "rewards/margins": 5.590358734130859,
1340
+ "rewards/rejected": 11.792974472045898,
1341
+ "step": 760
1342
+ },
1343
+ {
1344
+ "epoch": 0.6844444444444444,
1345
+ "grad_norm": 2.744900941848755,
1346
+ "learning_rate": 1.3705592204192853e-06,
1347
+ "logits/chosen": -0.011318420059978962,
1348
+ "logits/rejected": -0.021840626373887062,
1349
+ "logps/chosen": -2.592001438140869,
1350
+ "logps/rejected": -57.06926345825195,
1351
+ "loss": 0.1749,
1352
+ "rewards/accuracies": 0.9500000476837158,
1353
+ "rewards/chosen": 17.286128997802734,
1354
+ "rewards/margins": 5.662715911865234,
1355
+ "rewards/rejected": 11.623414993286133,
1356
+ "step": 770
1357
+ },
1358
+ {
1359
+ "epoch": 0.6933333333333334,
1360
+ "grad_norm": 70.76551818847656,
1361
+ "learning_rate": 1.301877533199859e-06,
1362
+ "logits/chosen": -0.013944407925009727,
1363
+ "logits/rejected": -0.025667501613497734,
1364
+ "logps/chosen": -2.1070234775543213,
1365
+ "logps/rejected": -57.72039794921875,
1366
+ "loss": 0.1915,
1367
+ "rewards/accuracies": 0.925000011920929,
1368
+ "rewards/chosen": 17.28545379638672,
1369
+ "rewards/margins": 5.708344459533691,
1370
+ "rewards/rejected": 11.577108383178711,
1371
+ "step": 780
1372
+ },
1373
+ {
1374
+ "epoch": 0.6933333333333334,
1375
+ "eval_logits/chosen": -0.01664295792579651,
1376
+ "eval_logits/rejected": -0.026626665145158768,
1377
+ "eval_logps/chosen": -1.082255244255066,
1378
+ "eval_logps/rejected": -55.95073699951172,
1379
+ "eval_loss": 0.24412688612937927,
1380
+ "eval_rewards/accuracies": 0.9320000410079956,
1381
+ "eval_rewards/chosen": 17.394668579101562,
1382
+ "eval_rewards/margins": 5.629020690917969,
1383
+ "eval_rewards/rejected": 11.765647888183594,
1384
+ "eval_runtime": 361.4602,
1385
+ "eval_samples_per_second": 2.767,
1386
+ "eval_steps_per_second": 0.346,
1387
+ "step": 780
1388
+ },
1389
+ {
1390
+ "epoch": 0.7022222222222222,
1391
+ "grad_norm": 0.41359376907348633,
1392
+ "learning_rate": 1.2343503755351729e-06,
1393
+ "logits/chosen": -0.012772129848599434,
1394
+ "logits/rejected": -0.023186586797237396,
1395
+ "logps/chosen": -0.8030359148979187,
1396
+ "logps/rejected": -57.84947967529297,
1397
+ "loss": 0.2143,
1398
+ "rewards/accuracies": 0.9375,
1399
+ "rewards/chosen": 17.32242202758789,
1400
+ "rewards/margins": 5.645486831665039,
1401
+ "rewards/rejected": 11.676933288574219,
1402
+ "step": 790
1403
+ },
1404
+ {
1405
+ "epoch": 0.7111111111111111,
1406
+ "grad_norm": 0.41748157143592834,
1407
+ "learning_rate": 1.168042817650881e-06,
1408
+ "logits/chosen": -0.012502101249992847,
1409
+ "logits/rejected": -0.023272844031453133,
1410
+ "logps/chosen": -1.0377256870269775,
1411
+ "logps/rejected": -57.05615997314453,
1412
+ "loss": 0.0792,
1413
+ "rewards/accuracies": 0.9750000238418579,
1414
+ "rewards/chosen": 17.60501480102539,
1415
+ "rewards/margins": 6.159370422363281,
1416
+ "rewards/rejected": 11.445646286010742,
1417
+ "step": 800
1418
+ },
1419
+ {
1420
+ "epoch": 0.72,
1421
+ "grad_norm": 0.039210401475429535,
1422
+ "learning_rate": 1.1030187545472012e-06,
1423
+ "logits/chosen": -0.008177272044122219,
1424
+ "logits/rejected": -0.01832464337348938,
1425
+ "logps/chosen": -3.8701748847961426,
1426
+ "logps/rejected": -57.661476135253906,
1427
+ "loss": 0.3177,
1428
+ "rewards/accuracies": 0.925000011920929,
1429
+ "rewards/chosen": 17.14336395263672,
1430
+ "rewards/margins": 5.583393573760986,
1431
+ "rewards/rejected": 11.55997085571289,
1432
+ "step": 810
1433
+ },
1434
+ {
1435
+ "epoch": 0.7288888888888889,
1436
+ "grad_norm": 1.6096951961517334,
1437
+ "learning_rate": 1.0393408444287048e-06,
1438
+ "logits/chosen": -0.006832236424088478,
1439
+ "logits/rejected": -0.01682097464799881,
1440
+ "logps/chosen": -2.1741790771484375,
1441
+ "logps/rejected": -57.273414611816406,
1442
+ "loss": 0.2128,
1443
+ "rewards/accuracies": 0.949999988079071,
1444
+ "rewards/chosen": 17.426712036132812,
1445
+ "rewards/margins": 5.911205291748047,
1446
+ "rewards/rejected": 11.515506744384766,
1447
+ "step": 820
1448
+ },
1449
+ {
1450
+ "epoch": 0.7377777777777778,
1451
+ "grad_norm": 3.7820959091186523,
1452
+ "learning_rate": 9.770704483258782e-07,
1453
+ "logits/chosen": -0.009998206980526447,
1454
+ "logits/rejected": -0.0204261876642704,
1455
+ "logps/chosen": -1.9802953004837036,
1456
+ "logps/rejected": -56.901512145996094,
1457
+ "loss": 0.1296,
1458
+ "rewards/accuracies": 0.9750000238418579,
1459
+ "rewards/chosen": 17.56071662902832,
1460
+ "rewards/margins": 6.169583320617676,
1461
+ "rewards/rejected": 11.391134262084961,
1462
+ "step": 830
1463
+ },
1464
+ {
1465
+ "epoch": 0.7466666666666667,
1466
+ "grad_norm": 1.126626968383789,
1467
+ "learning_rate": 9.162675709666865e-07,
1468
+ "logits/chosen": -0.00826224498450756,
1469
+ "logits/rejected": -0.018977787345647812,
1470
+ "logps/chosen": -1.4256607294082642,
1471
+ "logps/rejected": -61.65986633300781,
1472
+ "loss": 0.0635,
1473
+ "rewards/accuracies": 0.987500011920929,
1474
+ "rewards/chosen": 17.324975967407227,
1475
+ "rewards/margins": 6.09440803527832,
1476
+ "rewards/rejected": 11.230567932128906,
1477
+ "step": 840
1478
+ },
1479
+ {
1480
+ "epoch": 0.7466666666666667,
1481
+ "eval_logits/chosen": -0.01111944392323494,
1482
+ "eval_logits/rejected": -0.021697774529457092,
1483
+ "eval_logps/chosen": -1.2168633937835693,
1484
+ "eval_logps/rejected": -58.2642822265625,
1485
+ "eval_loss": 0.1689341962337494,
1486
+ "eval_rewards/accuracies": 0.9450000524520874,
1487
+ "eval_rewards/chosen": 17.381206512451172,
1488
+ "eval_rewards/margins": 5.846914291381836,
1489
+ "eval_rewards/rejected": 11.534292221069336,
1490
+ "eval_runtime": 361.6192,
1491
+ "eval_samples_per_second": 2.765,
1492
+ "eval_steps_per_second": 0.346,
1493
+ "step": 840
1494
+ },
1495
+ {
1496
+ "epoch": 0.7555555555555555,
1497
+ "grad_norm": 5.250723838806152,
1498
+ "learning_rate": 8.569908029550686e-07,
1499
+ "logits/chosen": -0.006854387000203133,
1500
+ "logits/rejected": -0.018336206674575806,
1501
+ "logps/chosen": -0.6238930821418762,
1502
+ "logps/rejected": -60.925689697265625,
1503
+ "loss": 0.1157,
1504
+ "rewards/accuracies": 0.9750000238418579,
1505
+ "rewards/chosen": 17.443281173706055,
1506
+ "rewards/margins": 6.164813995361328,
1507
+ "rewards/rejected": 11.278467178344727,
1508
+ "step": 850
1509
+ },
1510
+ {
1511
+ "epoch": 0.7644444444444445,
1512
+ "grad_norm": 3.1401162147521973,
1513
+ "learning_rate": 7.992972643121227e-07,
1514
+ "logits/chosen": -0.0037835021503269672,
1515
+ "logits/rejected": -0.013135241344571114,
1516
+ "logps/chosen": -0.8492221832275391,
1517
+ "logps/rejected": -55.516075134277344,
1518
+ "loss": 0.2252,
1519
+ "rewards/accuracies": 0.9375,
1520
+ "rewards/chosen": 17.488588333129883,
1521
+ "rewards/margins": 5.736725807189941,
1522
+ "rewards/rejected": 11.751862525939941,
1523
+ "step": 860
1524
+ },
1525
+ {
1526
+ "epoch": 0.7733333333333333,
1527
+ "grad_norm": 55.528812408447266,
1528
+ "learning_rate": 7.432425494343509e-07,
1529
+ "logits/chosen": -0.0033687639515846968,
1530
+ "logits/rejected": -0.013152632862329483,
1531
+ "logps/chosen": -1.3188884258270264,
1532
+ "logps/rejected": -57.9510498046875,
1533
+ "loss": 0.1398,
1534
+ "rewards/accuracies": 0.9624999761581421,
1535
+ "rewards/chosen": 17.412578582763672,
1536
+ "rewards/margins": 5.868515968322754,
1537
+ "rewards/rejected": 11.544061660766602,
1538
+ "step": 870
1539
+ },
1540
+ {
1541
+ "epoch": 0.7822222222222223,
1542
+ "grad_norm": 0.039824869483709335,
1543
+ "learning_rate": 6.888806735220396e-07,
1544
+ "logits/chosen": -0.0010406378423795104,
1545
+ "logits/rejected": -0.012095071375370026,
1546
+ "logps/chosen": -2.0619027614593506,
1547
+ "logps/rejected": -59.65806579589844,
1548
+ "loss": 0.2966,
1549
+ "rewards/accuracies": 0.9375,
1550
+ "rewards/chosen": 17.192535400390625,
1551
+ "rewards/margins": 5.7006731033325195,
1552
+ "rewards/rejected": 11.491861343383789,
1553
+ "step": 880
1554
+ },
1555
+ {
1556
+ "epoch": 0.7911111111111111,
1557
+ "grad_norm": 0.7536466717720032,
1558
+ "learning_rate": 6.362640205293583e-07,
1559
+ "logits/chosen": -0.0016857212176546454,
1560
+ "logits/rejected": -0.010936147533357143,
1561
+ "logps/chosen": -2.1478958129882812,
1562
+ "logps/rejected": -58.20386505126953,
1563
+ "loss": 0.1641,
1564
+ "rewards/accuracies": 0.9375,
1565
+ "rewards/chosen": 17.243247985839844,
1566
+ "rewards/margins": 5.678771018981934,
1567
+ "rewards/rejected": 11.564477920532227,
1568
+ "step": 890
1569
+ },
1570
+ {
1571
+ "epoch": 0.8,
1572
+ "grad_norm": 0.3857377767562866,
1573
+ "learning_rate": 5.854432926863684e-07,
1574
+ "logits/chosen": 0.00038508616853505373,
1575
+ "logits/rejected": -0.011034643277525902,
1576
+ "logps/chosen": -1.606274127960205,
1577
+ "logps/rejected": -62.58662414550781,
1578
+ "loss": 0.1703,
1579
+ "rewards/accuracies": 0.9624999761581421,
1580
+ "rewards/chosen": 17.211261749267578,
1581
+ "rewards/margins": 5.966868877410889,
1582
+ "rewards/rejected": 11.244392395019531,
1583
+ "step": 900
1584
+ },
1585
+ {
1586
+ "epoch": 0.8,
1587
+ "eval_logits/chosen": 0.00021816430671606213,
1588
+ "eval_logits/rejected": -0.010477552190423012,
1589
+ "eval_logps/chosen": -1.7576563358306885,
1590
+ "eval_logps/rejected": -59.79063415527344,
1591
+ "eval_loss": 0.13995186984539032,
1592
+ "eval_rewards/accuracies": 0.9610000848770142,
1593
+ "eval_rewards/chosen": 17.327129364013672,
1594
+ "eval_rewards/margins": 5.945469856262207,
1595
+ "eval_rewards/rejected": 11.381658554077148,
1596
+ "eval_runtime": 361.4592,
1597
+ "eval_samples_per_second": 2.767,
1598
+ "eval_steps_per_second": 0.346,
1599
+ "step": 900
1600
+ },
1601
+ {
1602
+ "epoch": 0.8088888888888889,
1603
+ "grad_norm": 0.13143697381019592,
1604
+ "learning_rate": 5.364674616415547e-07,
1605
+ "logits/chosen": 0.0005570838693529367,
1606
+ "logits/rejected": -0.011198626831173897,
1607
+ "logps/chosen": -0.12791283428668976,
1608
+ "logps/rejected": -65.7052993774414,
1609
+ "loss": 0.0507,
1610
+ "rewards/accuracies": 0.9750000238418579,
1611
+ "rewards/chosen": 17.224443435668945,
1612
+ "rewards/margins": 6.163690567016602,
1613
+ "rewards/rejected": 11.060752868652344,
1614
+ "step": 910
1615
+ },
1616
+ {
1617
+ "epoch": 0.8177777777777778,
1618
+ "grad_norm": 102.9654541015625,
1619
+ "learning_rate": 4.893837212719859e-07,
1620
+ "logits/chosen": -0.0008557128603570163,
1621
+ "logits/rejected": -0.01163212489336729,
1622
+ "logps/chosen": -1.3292646408081055,
1623
+ "logps/rejected": -61.644893646240234,
1624
+ "loss": 0.0881,
1625
+ "rewards/accuracies": 0.9500000476837158,
1626
+ "rewards/chosen": 17.276538848876953,
1627
+ "rewards/margins": 5.96080207824707,
1628
+ "rewards/rejected": 11.3157377243042,
1629
+ "step": 920
1630
+ },
1631
+ {
1632
+ "epoch": 0.8266666666666667,
1633
+ "grad_norm": 146.04498291015625,
1634
+ "learning_rate": 4.442374422065493e-07,
1635
+ "logits/chosen": 0.002922601066529751,
1636
+ "logits/rejected": -0.007130052894353867,
1637
+ "logps/chosen": -1.6069023609161377,
1638
+ "logps/rejected": -61.52588653564453,
1639
+ "loss": 0.1373,
1640
+ "rewards/accuracies": 0.949999988079071,
1641
+ "rewards/chosen": 17.184974670410156,
1642
+ "rewards/margins": 5.7995734214782715,
1643
+ "rewards/rejected": 11.38540267944336,
1644
+ "step": 930
1645
+ },
1646
+ {
1647
+ "epoch": 0.8355555555555556,
1648
+ "grad_norm": 23.539485931396484,
1649
+ "learning_rate": 4.0107212810610974e-07,
1650
+ "logits/chosen": 0.0018056132830679417,
1651
+ "logits/rejected": -0.007847340777516365,
1652
+ "logps/chosen": -3.637047290802002,
1653
+ "logps/rejected": -61.21245574951172,
1654
+ "loss": 0.2763,
1655
+ "rewards/accuracies": 0.925000011920929,
1656
+ "rewards/chosen": 16.960758209228516,
1657
+ "rewards/margins": 5.551811695098877,
1658
+ "rewards/rejected": 11.40894603729248,
1659
+ "step": 940
1660
+ },
1661
+ {
1662
+ "epoch": 0.8444444444444444,
1663
+ "grad_norm": 31.52926254272461,
1664
+ "learning_rate": 3.599293737426932e-07,
1665
+ "logits/chosen": 0.0032081177923828363,
1666
+ "logits/rejected": -0.007756482809782028,
1667
+ "logps/chosen": -1.172515869140625,
1668
+ "logps/rejected": -66.5853271484375,
1669
+ "loss": 0.1758,
1670
+ "rewards/accuracies": 0.949999988079071,
1671
+ "rewards/chosen": 16.992876052856445,
1672
+ "rewards/margins": 5.886469841003418,
1673
+ "rewards/rejected": 11.106407165527344,
1674
+ "step": 950
1675
+ },
1676
+ {
1677
+ "epoch": 0.8533333333333334,
1678
+ "grad_norm": 0.07474468648433685,
1679
+ "learning_rate": 3.208488249181216e-07,
1680
+ "logits/chosen": 0.0022508346009999514,
1681
+ "logits/rejected": -0.009156409651041031,
1682
+ "logps/chosen": -0.7930470108985901,
1683
+ "logps/rejected": -60.068790435791016,
1684
+ "loss": 0.1138,
1685
+ "rewards/accuracies": 0.9624999761581421,
1686
+ "rewards/chosen": 17.401771545410156,
1687
+ "rewards/margins": 6.0026960372924805,
1688
+ "rewards/rejected": 11.399076461791992,
1689
+ "step": 960
1690
+ },
1691
+ {
1692
+ "epoch": 0.8533333333333334,
1693
+ "eval_logits/chosen": 0.001469604205340147,
1694
+ "eval_logits/rejected": -0.009397665038704872,
1695
+ "eval_logps/chosen": -1.8795456886291504,
1696
+ "eval_logps/rejected": -60.17564010620117,
1697
+ "eval_loss": 0.1441129744052887,
1698
+ "eval_rewards/accuracies": 0.9630000591278076,
1699
+ "eval_rewards/chosen": 17.314937591552734,
1700
+ "eval_rewards/margins": 5.9717817306518555,
1701
+ "eval_rewards/rejected": 11.343156814575195,
1702
+ "eval_runtime": 361.5344,
1703
+ "eval_samples_per_second": 2.766,
1704
+ "eval_steps_per_second": 0.346,
1705
+ "step": 960
1706
+ },
1707
+ {
1708
+ "epoch": 0.8622222222222222,
1709
+ "grad_norm": 68.90747833251953,
1710
+ "learning_rate": 2.838681402606952e-07,
1711
+ "logits/chosen": 0.004552370868623257,
1712
+ "logits/rejected": -0.005488495342433453,
1713
+ "logps/chosen": -3.7298974990844727,
1714
+ "logps/rejected": -64.72488403320312,
1715
+ "loss": 0.2425,
1716
+ "rewards/accuracies": 0.925000011920929,
1717
+ "rewards/chosen": 16.832754135131836,
1718
+ "rewards/margins": 5.644216537475586,
1719
+ "rewards/rejected": 11.18853759765625,
1720
+ "step": 970
1721
+ },
1722
+ {
1723
+ "epoch": 0.8711111111111111,
1724
+ "grad_norm": 0.028206102550029755,
1725
+ "learning_rate": 2.490229549367443e-07,
1726
+ "logits/chosen": 0.0025807656347751617,
1727
+ "logits/rejected": -0.008657123893499374,
1728
+ "logps/chosen": -1.1825838088989258,
1729
+ "logps/rejected": -63.57493591308594,
1730
+ "loss": 0.0609,
1731
+ "rewards/accuracies": 0.9750000238418579,
1732
+ "rewards/chosen": 17.227130889892578,
1733
+ "rewards/margins": 6.053717136383057,
1734
+ "rewards/rejected": 11.173412322998047,
1735
+ "step": 980
1736
+ },
1737
+ {
1738
+ "epoch": 0.88,
1739
+ "grad_norm": 0.39742231369018555,
1740
+ "learning_rate": 2.1634684631203412e-07,
1741
+ "logits/chosen": 0.0048486413434147835,
1742
+ "logits/rejected": -0.006056814920157194,
1743
+ "logps/chosen": -2.919680595397949,
1744
+ "logps/rejected": -57.8403205871582,
1745
+ "loss": 0.3464,
1746
+ "rewards/accuracies": 0.8999999761581421,
1747
+ "rewards/chosen": 17.175275802612305,
1748
+ "rewards/margins": 5.559727668762207,
1749
+ "rewards/rejected": 11.615548133850098,
1750
+ "step": 990
1751
+ },
1752
+ {
1753
+ "epoch": 0.8888888888888888,
1754
+ "grad_norm": 30.22509002685547,
1755
+ "learning_rate": 1.8587130159608196e-07,
1756
+ "logits/chosen": 0.0050649940967559814,
1757
+ "logits/rejected": -0.0071399761363863945,
1758
+ "logps/chosen": -0.16131475567817688,
1759
+ "logps/rejected": -66.25190734863281,
1760
+ "loss": 0.0063,
1761
+ "rewards/accuracies": 1.0,
1762
+ "rewards/chosen": 17.313472747802734,
1763
+ "rewards/margins": 6.412895679473877,
1764
+ "rewards/rejected": 10.9005765914917,
1765
+ "step": 1000
1766
+ }
1767
+ ],
1768
+ "logging_steps": 10,
1769
+ "max_steps": 1125,
1770
+ "num_input_tokens_seen": 0,
1771
+ "num_train_epochs": 1,
1772
+ "save_steps": 500,
1773
+ "stateful_callbacks": {
1774
+ "TrainerControl": {
1775
+ "args": {
1776
+ "should_epoch_stop": false,
1777
+ "should_evaluate": false,
1778
+ "should_log": false,
1779
+ "should_save": true,
1780
+ "should_training_stop": false
1781
+ },
1782
+ "attributes": {}
1783
+ }
1784
+ },
1785
+ "total_flos": 1.2745297197268992e+18,
1786
+ "train_batch_size": 4,
1787
+ "trial_name": null,
1788
+ "trial_params": null
1789
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5286bf380083ef64133f25e161ddfb8549ff44790e432f478753f221ef89e695
3
+ size 5368
checkpoint-1125/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-1125/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "gate_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "down_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-1125/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6404a5e1c60aa26a99a4ccbb402f485483d5eea90c08441f0448e29c51e2b1d0
3
+ size 114106856
checkpoint-1125/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:679b631e5cc31300cab94ec2968b5eafa50b74a27dc17dffbac7d227a1ae878e
3
+ size 228536930
checkpoint-1125/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f6ef5fd81f87dc8a69f1b9a751ba37cc49c37318322e45ba4733ff23a92208
3
+ size 14512
checkpoint-1125/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f510db96e40d7f66609c96cf485c13417fc4eaf253603d2b6591466c3fb5f63a
3
+ size 14512
checkpoint-1125/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6eb7801fbbb5055fabd75421a32f0209bef58fe235404c9e167738db177888
3
+ size 1064
checkpoint-1125/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1125/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
checkpoint-1125/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1125/trainer_state.json ADDED
@@ -0,0 +1,2001 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 60,
6
+ "global_step": 1125,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.008888888888888889,
13
+ "grad_norm": 8.760091781616211,
14
+ "learning_rate": 4.4247787610619474e-07,
15
+ "logits/chosen": -0.8248252868652344,
16
+ "logits/rejected": -0.8263720273971558,
17
+ "logps/chosen": -0.36086463928222656,
18
+ "logps/rejected": -5.696224689483643,
19
+ "loss": 1.1038,
20
+ "rewards/accuracies": 0.5125000476837158,
21
+ "rewards/chosen": 17.43745994567871,
22
+ "rewards/margins": 0.5984855890274048,
23
+ "rewards/rejected": 16.838973999023438,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.017777777777777778,
28
+ "grad_norm": 8.855981826782227,
29
+ "learning_rate": 8.849557522123895e-07,
30
+ "logits/chosen": -0.8169006109237671,
31
+ "logits/rejected": -0.819770872592926,
32
+ "logps/chosen": -0.12464660406112671,
33
+ "logps/rejected": -7.139842987060547,
34
+ "loss": 1.1887,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 17.17649269104004,
37
+ "rewards/margins": 0.19107049703598022,
38
+ "rewards/rejected": 16.98542022705078,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.02666666666666667,
43
+ "grad_norm": 16.764184951782227,
44
+ "learning_rate": 1.3274336283185843e-06,
45
+ "logits/chosen": -0.8003113865852356,
46
+ "logits/rejected": -0.8030117750167847,
47
+ "logps/chosen": -0.34651467204093933,
48
+ "logps/rejected": -6.967917442321777,
49
+ "loss": 1.0563,
50
+ "rewards/accuracies": 0.44999998807907104,
51
+ "rewards/chosen": 17.280975341796875,
52
+ "rewards/margins": 0.40005987882614136,
53
+ "rewards/rejected": 16.88091468811035,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.035555555555555556,
58
+ "grad_norm": 8.33682918548584,
59
+ "learning_rate": 1.769911504424779e-06,
60
+ "logits/chosen": -0.7695047855377197,
61
+ "logits/rejected": -0.7739207148551941,
62
+ "logps/chosen": -1.5993006229400635,
63
+ "logps/rejected": -8.504932403564453,
64
+ "loss": 0.7596,
65
+ "rewards/accuracies": 0.5,
66
+ "rewards/chosen": 17.283912658691406,
67
+ "rewards/margins": 0.6976072192192078,
68
+ "rewards/rejected": 16.5863037109375,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.044444444444444446,
73
+ "grad_norm": 4.494723320007324,
74
+ "learning_rate": 2.212389380530974e-06,
75
+ "logits/chosen": -0.7154140472412109,
76
+ "logits/rejected": -0.7225576043128967,
77
+ "logps/chosen": -3.112199068069458,
78
+ "logps/rejected": -12.212080001831055,
79
+ "loss": 0.6083,
80
+ "rewards/accuracies": 0.4625000059604645,
81
+ "rewards/chosen": 17.03064727783203,
82
+ "rewards/margins": 0.7148451805114746,
83
+ "rewards/rejected": 16.3158016204834,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.05333333333333334,
88
+ "grad_norm": 5.110287666320801,
89
+ "learning_rate": 2.6548672566371687e-06,
90
+ "logits/chosen": -0.6322453022003174,
91
+ "logits/rejected": -0.6387485265731812,
92
+ "logps/chosen": -5.650620460510254,
93
+ "logps/rejected": -12.759811401367188,
94
+ "loss": 0.3835,
95
+ "rewards/accuracies": 0.9125000238418579,
96
+ "rewards/chosen": 17.101289749145508,
97
+ "rewards/margins": 1.1824612617492676,
98
+ "rewards/rejected": 15.918828964233398,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.05333333333333334,
103
+ "eval_logits/chosen": -0.5826543569564819,
104
+ "eval_logits/rejected": -0.5914276838302612,
105
+ "eval_logps/chosen": -3.5471787452697754,
106
+ "eval_logps/rejected": -16.51181983947754,
107
+ "eval_loss": 0.3286525011062622,
108
+ "eval_rewards/accuracies": 0.9280000925064087,
109
+ "eval_rewards/chosen": 17.148174285888672,
110
+ "eval_rewards/margins": 1.4386365413665771,
111
+ "eval_rewards/rejected": 15.709539413452148,
112
+ "eval_runtime": 372.0227,
113
+ "eval_samples_per_second": 2.688,
114
+ "eval_steps_per_second": 0.336,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.06222222222222222,
119
+ "grad_norm": 5.098133563995361,
120
+ "learning_rate": 3.097345132743363e-06,
121
+ "logits/chosen": -0.5378152132034302,
122
+ "logits/rejected": -0.5494933724403381,
123
+ "logps/chosen": -1.5099802017211914,
124
+ "logps/rejected": -21.206321716308594,
125
+ "loss": 0.2931,
126
+ "rewards/accuracies": 0.9375,
127
+ "rewards/chosen": 17.083791732788086,
128
+ "rewards/margins": 1.5844331979751587,
129
+ "rewards/rejected": 15.499359130859375,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.07111111111111111,
134
+ "grad_norm": 29.787437438964844,
135
+ "learning_rate": 3.539823008849558e-06,
136
+ "logits/chosen": -0.443774938583374,
137
+ "logits/rejected": -0.45571577548980713,
138
+ "logps/chosen": -1.5804342031478882,
139
+ "logps/rejected": -22.606929779052734,
140
+ "loss": 0.202,
141
+ "rewards/accuracies": 0.9750000238418579,
142
+ "rewards/chosen": 17.302125930786133,
143
+ "rewards/margins": 2.174014091491699,
144
+ "rewards/rejected": 15.128110885620117,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.08,
149
+ "grad_norm": 23.14398193359375,
150
+ "learning_rate": 3.982300884955752e-06,
151
+ "logits/chosen": -0.3626072406768799,
152
+ "logits/rejected": -0.3787815570831299,
153
+ "logps/chosen": -2.203828811645508,
154
+ "logps/rejected": -29.433551788330078,
155
+ "loss": 0.2123,
156
+ "rewards/accuracies": 0.925000011920929,
157
+ "rewards/chosen": 17.00284194946289,
158
+ "rewards/margins": 2.320391893386841,
159
+ "rewards/rejected": 14.682450294494629,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.08888888888888889,
164
+ "grad_norm": 29.672739028930664,
165
+ "learning_rate": 4.424778761061948e-06,
166
+ "logits/chosen": -0.3035663962364197,
167
+ "logits/rejected": -0.31762221455574036,
168
+ "logps/chosen": -3.433589458465576,
169
+ "logps/rejected": -29.9322509765625,
170
+ "loss": 0.2592,
171
+ "rewards/accuracies": 0.9375,
172
+ "rewards/chosen": 16.929956436157227,
173
+ "rewards/margins": 2.31272029876709,
174
+ "rewards/rejected": 14.617237091064453,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.09777777777777778,
179
+ "grad_norm": 1.873722791671753,
180
+ "learning_rate": 4.867256637168142e-06,
181
+ "logits/chosen": -0.2679600715637207,
182
+ "logits/rejected": -0.2826440930366516,
183
+ "logps/chosen": -0.9653514623641968,
184
+ "logps/rejected": -30.235322952270508,
185
+ "loss": 0.1336,
186
+ "rewards/accuracies": 0.949999988079071,
187
+ "rewards/chosen": 17.462385177612305,
188
+ "rewards/margins": 3.1994175910949707,
189
+ "rewards/rejected": 14.26296615600586,
190
+ "step": 110
191
+ },
192
+ {
193
+ "epoch": 0.10666666666666667,
194
+ "grad_norm": 1.6913721561431885,
195
+ "learning_rate": 4.999409761242696e-06,
196
+ "logits/chosen": -0.22222033143043518,
197
+ "logits/rejected": -0.23720571398735046,
198
+ "logps/chosen": -4.4953508377075195,
199
+ "logps/rejected": -34.074745178222656,
200
+ "loss": 0.2552,
201
+ "rewards/accuracies": 0.8999999761581421,
202
+ "rewards/chosen": 17.04866600036621,
203
+ "rewards/margins": 3.1014418601989746,
204
+ "rewards/rejected": 13.947224617004395,
205
+ "step": 120
206
+ },
207
+ {
208
+ "epoch": 0.10666666666666667,
209
+ "eval_logits/chosen": -0.206527978181839,
210
+ "eval_logits/rejected": -0.22178640961647034,
211
+ "eval_logps/chosen": -3.69442081451416,
212
+ "eval_logps/rejected": -36.072166442871094,
213
+ "eval_loss": 0.18996010720729828,
214
+ "eval_rewards/accuracies": 0.9320000410079956,
215
+ "eval_rewards/chosen": 17.133451461791992,
216
+ "eval_rewards/margins": 3.379946708679199,
217
+ "eval_rewards/rejected": 13.753504753112793,
218
+ "eval_runtime": 361.5279,
219
+ "eval_samples_per_second": 2.766,
220
+ "eval_steps_per_second": 0.346,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.11555555555555555,
225
+ "grad_norm": 61.80262756347656,
226
+ "learning_rate": 4.996519466816778e-06,
227
+ "logits/chosen": -0.18473535776138306,
228
+ "logits/rejected": -0.1988501250743866,
229
+ "logps/chosen": -3.7009687423706055,
230
+ "logps/rejected": -39.289939880371094,
231
+ "loss": 0.1394,
232
+ "rewards/accuracies": 0.9624999761581421,
233
+ "rewards/chosen": 17.106964111328125,
234
+ "rewards/margins": 3.633338212966919,
235
+ "rewards/rejected": 13.473625183105469,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.12444444444444444,
240
+ "grad_norm": 1.6732702255249023,
241
+ "learning_rate": 4.9912234871722805e-06,
242
+ "logits/chosen": -0.16134041547775269,
243
+ "logits/rejected": -0.17547868192195892,
244
+ "logps/chosen": -3.0637736320495605,
245
+ "logps/rejected": -40.07548522949219,
246
+ "loss": 0.1408,
247
+ "rewards/accuracies": 0.9750000238418579,
248
+ "rewards/chosen": 17.392223358154297,
249
+ "rewards/margins": 4.242353439331055,
250
+ "rewards/rejected": 13.149867057800293,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.13333333333333333,
255
+ "grad_norm": 0.346453994512558,
256
+ "learning_rate": 4.98352692559805e-06,
257
+ "logits/chosen": -0.13797929883003235,
258
+ "logits/rejected": -0.15283086895942688,
259
+ "logps/chosen": -5.14492130279541,
260
+ "logps/rejected": -47.97212219238281,
261
+ "loss": 0.2153,
262
+ "rewards/accuracies": 0.9375,
263
+ "rewards/chosen": 16.896778106689453,
264
+ "rewards/margins": 4.227695465087891,
265
+ "rewards/rejected": 12.669081687927246,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.14222222222222222,
270
+ "grad_norm": 0.21871662139892578,
271
+ "learning_rate": 4.973437198621237e-06,
272
+ "logits/chosen": -0.12396670132875443,
273
+ "logits/rejected": -0.13780555129051208,
274
+ "logps/chosen": -6.108860015869141,
275
+ "logps/rejected": -54.90739440917969,
276
+ "loss": 0.0388,
277
+ "rewards/accuracies": 0.9750000238418579,
278
+ "rewards/chosen": 16.75935935974121,
279
+ "rewards/margins": 4.755282878875732,
280
+ "rewards/rejected": 12.004077911376953,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.1511111111111111,
285
+ "grad_norm": 235.12429809570312,
286
+ "learning_rate": 4.960964028860621e-06,
287
+ "logits/chosen": -0.1140839159488678,
288
+ "logits/rejected": -0.1263057291507721,
289
+ "logps/chosen": -12.605452537536621,
290
+ "logps/rejected": -53.81230926513672,
291
+ "loss": 0.4651,
292
+ "rewards/accuracies": 0.875,
293
+ "rewards/chosen": 16.101238250732422,
294
+ "rewards/margins": 3.9864249229431152,
295
+ "rewards/rejected": 12.114812850952148,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.16,
300
+ "grad_norm": 190.97048950195312,
301
+ "learning_rate": 4.946119435657738e-06,
302
+ "logits/chosen": -0.10746976733207703,
303
+ "logits/rejected": -0.11878640949726105,
304
+ "logps/chosen": -8.5105562210083,
305
+ "logps/rejected": -51.314781188964844,
306
+ "loss": 0.2362,
307
+ "rewards/accuracies": 0.925000011920929,
308
+ "rewards/chosen": 16.719980239868164,
309
+ "rewards/margins": 4.549674034118652,
310
+ "rewards/rejected": 12.170306205749512,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.16,
315
+ "eval_logits/chosen": -0.10870806127786636,
316
+ "eval_logits/rejected": -0.12223993986845016,
317
+ "eval_logps/chosen": -4.414996147155762,
318
+ "eval_logps/rejected": -53.885032653808594,
319
+ "eval_loss": 0.20236633718013763,
320
+ "eval_rewards/accuracies": 0.9510000944137573,
321
+ "eval_rewards/chosen": 17.06139373779297,
322
+ "eval_rewards/margins": 5.089176177978516,
323
+ "eval_rewards/rejected": 11.97221851348877,
324
+ "eval_runtime": 361.4355,
325
+ "eval_samples_per_second": 2.767,
326
+ "eval_steps_per_second": 0.346,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 0.1688888888888889,
331
+ "grad_norm": 56.81266784667969,
332
+ "learning_rate": 4.928917723494854e-06,
333
+ "logits/chosen": -0.10682469606399536,
334
+ "logits/rejected": -0.12124393880367279,
335
+ "logps/chosen": -3.058413028717041,
336
+ "logps/rejected": -55.052528381347656,
337
+ "loss": 0.2442,
338
+ "rewards/accuracies": 0.9500000476837158,
339
+ "rewards/chosen": 17.058589935302734,
340
+ "rewards/margins": 5.056097984313965,
341
+ "rewards/rejected": 12.002490043640137,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 0.17777777777777778,
346
+ "grad_norm": 175.06552124023438,
347
+ "learning_rate": 4.909375468210947e-06,
348
+ "logits/chosen": -0.10520349442958832,
349
+ "logits/rejected": -0.12018950283527374,
350
+ "logps/chosen": -4.114959716796875,
351
+ "logps/rejected": -55.9394645690918,
352
+ "loss": 0.1915,
353
+ "rewards/accuracies": 0.9500000476837158,
354
+ "rewards/chosen": 16.98603057861328,
355
+ "rewards/margins": 5.105838775634766,
356
+ "rewards/rejected": 11.880191802978516,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.18666666666666668,
361
+ "grad_norm": 78.06558990478516,
362
+ "learning_rate": 4.8875115010289655e-06,
363
+ "logits/chosen": -0.10475558042526245,
364
+ "logits/rejected": -0.11949175596237183,
365
+ "logps/chosen": -6.760301113128662,
366
+ "logps/rejected": -53.91607666015625,
367
+ "loss": 0.2843,
368
+ "rewards/accuracies": 0.9375,
369
+ "rewards/chosen": 16.857545852661133,
370
+ "rewards/margins": 4.917357921600342,
371
+ "rewards/rejected": 11.94018840789795,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.19555555555555557,
376
+ "grad_norm": 15.880486488342285,
377
+ "learning_rate": 4.863346890409768e-06,
378
+ "logits/chosen": -0.11213523149490356,
379
+ "logits/rejected": -0.12581588327884674,
380
+ "logps/chosen": -6.759585380554199,
381
+ "logps/rejected": -51.10936737060547,
382
+ "loss": 0.5104,
383
+ "rewards/accuracies": 0.875,
384
+ "rewards/chosen": 16.859071731567383,
385
+ "rewards/margins": 4.638372898101807,
386
+ "rewards/rejected": 12.220698356628418,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 0.20444444444444446,
391
+ "grad_norm": 46.97845458984375,
392
+ "learning_rate": 4.836904921750224e-06,
393
+ "logits/chosen": -0.11947059631347656,
394
+ "logits/rejected": -0.1329912692308426,
395
+ "logps/chosen": -3.608184814453125,
396
+ "logps/rejected": -48.794761657714844,
397
+ "loss": 0.2134,
398
+ "rewards/accuracies": 0.925000011920929,
399
+ "rewards/chosen": 17.235904693603516,
400
+ "rewards/margins": 4.859888076782227,
401
+ "rewards/rejected": 12.376014709472656,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 0.21333333333333335,
406
+ "grad_norm": 24.032859802246094,
407
+ "learning_rate": 4.808211074945042e-06,
408
+ "logits/chosen": -0.1200513243675232,
409
+ "logits/rejected": -0.1333036869764328,
410
+ "logps/chosen": -3.7552154064178467,
411
+ "logps/rejected": -49.87453079223633,
412
+ "loss": 0.1781,
413
+ "rewards/accuracies": 0.9500000476837158,
414
+ "rewards/chosen": 17.094650268554688,
415
+ "rewards/margins": 4.68077278137207,
416
+ "rewards/rejected": 12.41387939453125,
417
+ "step": 240
418
+ },
419
+ {
420
+ "epoch": 0.21333333333333335,
421
+ "eval_logits/chosen": -0.12433278560638428,
422
+ "eval_logits/rejected": -0.13808581233024597,
423
+ "eval_logps/chosen": -4.408891201019287,
424
+ "eval_logps/rejected": -50.744781494140625,
425
+ "eval_loss": 0.1546352356672287,
426
+ "eval_rewards/accuracies": 0.9500000476837158,
427
+ "eval_rewards/chosen": 17.06200408935547,
428
+ "eval_rewards/margins": 4.775761604309082,
429
+ "eval_rewards/rejected": 12.286243438720703,
430
+ "eval_runtime": 361.4974,
431
+ "eval_samples_per_second": 2.766,
432
+ "eval_steps_per_second": 0.346,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 0.2222222222222222,
437
+ "grad_norm": 0.25737640261650085,
438
+ "learning_rate": 4.7772929998339485e-06,
439
+ "logits/chosen": -0.12348780035972595,
440
+ "logits/rejected": -0.13704943656921387,
441
+ "logps/chosen": -4.4299187660217285,
442
+ "logps/rejected": -53.074607849121094,
443
+ "loss": 0.1373,
444
+ "rewards/accuracies": 0.9375,
445
+ "rewards/chosen": 17.087068557739258,
446
+ "rewards/margins": 5.06691837310791,
447
+ "rewards/rejected": 12.020149230957031,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 0.2311111111111111,
452
+ "grad_norm": 0.1839389204978943,
453
+ "learning_rate": 4.744180489557859e-06,
454
+ "logits/chosen": -0.12177034467458725,
455
+ "logits/rejected": -0.1342695653438568,
456
+ "logps/chosen": -3.775188446044922,
457
+ "logps/rejected": -53.98720932006836,
458
+ "loss": 0.1896,
459
+ "rewards/accuracies": 0.949999988079071,
460
+ "rewards/chosen": 17.12021255493164,
461
+ "rewards/margins": 5.148064613342285,
462
+ "rewards/rejected": 11.972146987915039,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 0.24,
467
+ "grad_norm": 12.258485794067383,
468
+ "learning_rate": 4.708905451849754e-06,
469
+ "logits/chosen": -0.11067859083414078,
470
+ "logits/rejected": -0.12377731502056122,
471
+ "logps/chosen": -6.418317794799805,
472
+ "logps/rejected": -56.57402801513672,
473
+ "loss": 0.2315,
474
+ "rewards/accuracies": 0.9375,
475
+ "rewards/chosen": 16.738832473754883,
476
+ "rewards/margins": 4.884931564331055,
477
+ "rewards/rejected": 11.853900909423828,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 0.24888888888888888,
482
+ "grad_norm": 77.56194305419922,
483
+ "learning_rate": 4.671501878287879e-06,
484
+ "logits/chosen": -0.1184445172548294,
485
+ "logits/rejected": -0.1339874565601349,
486
+ "logps/chosen": -10.12116527557373,
487
+ "logps/rejected": -53.403907775878906,
488
+ "loss": 0.5343,
489
+ "rewards/accuracies": 0.862500011920929,
490
+ "rewards/chosen": 16.458633422851562,
491
+ "rewards/margins": 4.402472496032715,
492
+ "rewards/rejected": 12.056160926818848,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.2577777777777778,
497
+ "grad_norm": 67.53883361816406,
498
+ "learning_rate": 4.6320058115409295e-06,
499
+ "logits/chosen": -0.1448262631893158,
500
+ "logits/rejected": -0.15793387591838837,
501
+ "logps/chosen": -3.4666190147399902,
502
+ "logps/rejected": -48.79213333129883,
503
+ "loss": 0.5017,
504
+ "rewards/accuracies": 0.887499988079071,
505
+ "rewards/chosen": 16.945899963378906,
506
+ "rewards/margins": 4.2686333656311035,
507
+ "rewards/rejected": 12.677268028259277,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.26666666666666666,
512
+ "grad_norm": 0.17521341145038605,
513
+ "learning_rate": 4.590455310636778e-06,
514
+ "logits/chosen": -0.16128253936767578,
515
+ "logits/rejected": -0.17375555634498596,
516
+ "logps/chosen": -2.9032950401306152,
517
+ "logps/rejected": -47.69734191894531,
518
+ "loss": 0.265,
519
+ "rewards/accuracies": 0.925000011920929,
520
+ "rewards/chosen": 17.18383026123047,
521
+ "rewards/margins": 4.541309356689453,
522
+ "rewards/rejected": 12.642518997192383,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.26666666666666666,
527
+ "eval_logits/chosen": -0.17444846034049988,
528
+ "eval_logits/rejected": -0.18559777736663818,
529
+ "eval_logps/chosen": -2.535512924194336,
530
+ "eval_logps/rejected": -47.16367721557617,
531
+ "eval_loss": 0.15360687673091888,
532
+ "eval_rewards/accuracies": 0.9440000653266907,
533
+ "eval_rewards/chosen": 17.249343872070312,
534
+ "eval_rewards/margins": 4.604989051818848,
535
+ "eval_rewards/rejected": 12.644353866577148,
536
+ "eval_runtime": 361.4575,
537
+ "eval_samples_per_second": 2.767,
538
+ "eval_steps_per_second": 0.346,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 0.27555555555555555,
543
+ "grad_norm": 0.5040452480316162,
544
+ "learning_rate": 4.54689041428819e-06,
545
+ "logits/chosen": -0.16974106431007385,
546
+ "logits/rejected": -0.1810058057308197,
547
+ "logps/chosen": -1.233938217163086,
548
+ "logps/rejected": -49.907745361328125,
549
+ "loss": 0.1132,
550
+ "rewards/accuracies": 0.9500000476837158,
551
+ "rewards/chosen": 17.34117889404297,
552
+ "rewards/margins": 4.934173583984375,
553
+ "rewards/rejected": 12.407005310058594,
554
+ "step": 310
555
+ },
556
+ {
557
+ "epoch": 0.28444444444444444,
558
+ "grad_norm": 100.02949523925781,
559
+ "learning_rate": 4.501353102310901e-06,
560
+ "logits/chosen": -0.15705889463424683,
561
+ "logits/rejected": -0.1695334017276764,
562
+ "logps/chosen": -1.0820492506027222,
563
+ "logps/rejected": -52.577110290527344,
564
+ "loss": 0.1194,
565
+ "rewards/accuracies": 0.9500000476837158,
566
+ "rewards/chosen": 17.33388900756836,
567
+ "rewards/margins": 5.154760837554932,
568
+ "rewards/rejected": 12.179126739501953,
569
+ "step": 320
570
+ },
571
+ {
572
+ "epoch": 0.29333333333333333,
573
+ "grad_norm": 0.2689219117164612,
574
+ "learning_rate": 4.453887255171206e-06,
575
+ "logits/chosen": -0.13849371671676636,
576
+ "logits/rejected": -0.14990833401679993,
577
+ "logps/chosen": -1.8435032367706299,
578
+ "logps/rejected": -54.79044723510742,
579
+ "loss": 0.0926,
580
+ "rewards/accuracies": 0.9500000476837158,
581
+ "rewards/chosen": 17.2423095703125,
582
+ "rewards/margins": 5.28987979888916,
583
+ "rewards/rejected": 11.952428817749023,
584
+ "step": 330
585
+ },
586
+ {
587
+ "epoch": 0.3022222222222222,
588
+ "grad_norm": 0.09305431693792343,
589
+ "learning_rate": 4.404538611702055e-06,
590
+ "logits/chosen": -0.12299702316522598,
591
+ "logits/rejected": -0.13453055918216705,
592
+ "logps/chosen": -2.9897143840789795,
593
+ "logps/rejected": -52.954498291015625,
594
+ "loss": 0.2873,
595
+ "rewards/accuracies": 0.925000011920929,
596
+ "rewards/chosen": 17.17474365234375,
597
+ "rewards/margins": 5.071004867553711,
598
+ "rewards/rejected": 12.103739738464355,
599
+ "step": 340
600
+ },
601
+ {
602
+ "epoch": 0.3111111111111111,
603
+ "grad_norm": 59.282073974609375,
604
+ "learning_rate": 4.3533547250284015e-06,
605
+ "logits/chosen": -0.11913029849529266,
606
+ "logits/rejected": -0.12785324454307556,
607
+ "logps/chosen": -3.9456872940063477,
608
+ "logps/rejected": -48.68487548828125,
609
+ "loss": 0.4332,
610
+ "rewards/accuracies": 0.875,
611
+ "rewards/chosen": 17.12805938720703,
612
+ "rewards/margins": 4.669450283050537,
613
+ "rewards/rejected": 12.458610534667969,
614
+ "step": 350
615
+ },
616
+ {
617
+ "epoch": 0.32,
618
+ "grad_norm": 0.31101909279823303,
619
+ "learning_rate": 4.300384916744261e-06,
620
+ "logits/chosen": -0.11280188709497452,
621
+ "logits/rejected": -0.12300585210323334,
622
+ "logps/chosen": -2.1714723110198975,
623
+ "logps/rejected": -54.74174118041992,
624
+ "loss": 0.1605,
625
+ "rewards/accuracies": 0.9500000476837158,
626
+ "rewards/chosen": 17.326162338256836,
627
+ "rewards/margins": 5.467062473297119,
628
+ "rewards/rejected": 11.859098434448242,
629
+ "step": 360
630
+ },
631
+ {
632
+ "epoch": 0.32,
633
+ "eval_logits/chosen": -0.10620756447315216,
634
+ "eval_logits/rejected": -0.11727114766836166,
635
+ "eval_logps/chosen": -1.4165427684783936,
636
+ "eval_logps/rejected": -50.9525146484375,
637
+ "eval_loss": 0.3194349706172943,
638
+ "eval_rewards/accuracies": 0.9210000038146973,
639
+ "eval_rewards/chosen": 17.36124038696289,
640
+ "eval_rewards/margins": 5.095769882202148,
641
+ "eval_rewards/rejected": 12.26546859741211,
642
+ "eval_runtime": 361.5072,
643
+ "eval_samples_per_second": 2.766,
644
+ "eval_steps_per_second": 0.346,
645
+ "step": 360
646
+ },
647
+ {
648
+ "epoch": 0.3288888888888889,
649
+ "grad_norm": 6.1126532554626465,
650
+ "learning_rate": 4.24568022938566e-06,
651
+ "logits/chosen": -0.10354311764240265,
652
+ "logits/rejected": -0.11526636779308319,
653
+ "logps/chosen": -1.2935255765914917,
654
+ "logps/rejected": -55.57566833496094,
655
+ "loss": 0.1711,
656
+ "rewards/accuracies": 0.9500000476837158,
657
+ "rewards/chosen": 17.439346313476562,
658
+ "rewards/margins": 5.700921058654785,
659
+ "rewards/rejected": 11.738424301147461,
660
+ "step": 370
661
+ },
662
+ {
663
+ "epoch": 0.3377777777777778,
664
+ "grad_norm": 34.15927505493164,
665
+ "learning_rate": 4.189293377245241e-06,
666
+ "logits/chosen": -0.1029932051897049,
667
+ "logits/rejected": -0.11382515728473663,
668
+ "logps/chosen": -2.5132687091827393,
669
+ "logps/rejected": -55.50346374511719,
670
+ "loss": 0.4359,
671
+ "rewards/accuracies": 0.8875000476837158,
672
+ "rewards/chosen": 16.731037139892578,
673
+ "rewards/margins": 4.368172645568848,
674
+ "rewards/rejected": 12.362865447998047,
675
+ "step": 380
676
+ },
677
+ {
678
+ "epoch": 0.3466666666666667,
679
+ "grad_norm": 2.8422904014587402,
680
+ "learning_rate": 4.131278695575952e-06,
681
+ "logits/chosen": -0.10793520510196686,
682
+ "logits/rejected": -0.12109285593032837,
683
+ "logps/chosen": -3.014652729034424,
684
+ "logps/rejected": -53.98411560058594,
685
+ "loss": 0.2161,
686
+ "rewards/accuracies": 0.949999988079071,
687
+ "rewards/chosen": 17.137393951416016,
688
+ "rewards/margins": 5.105995178222656,
689
+ "rewards/rejected": 12.03139877319336,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 0.35555555555555557,
694
+ "grad_norm": 54.0329475402832,
695
+ "learning_rate": 4.071692088232743e-06,
696
+ "logits/chosen": -0.10393750667572021,
697
+ "logits/rejected": -0.11834606528282166,
698
+ "logps/chosen": -2.1508543491363525,
699
+ "logps/rejected": -45.60733413696289,
700
+ "loss": 0.2077,
701
+ "rewards/accuracies": 0.925000011920929,
702
+ "rewards/chosen": 17.586124420166016,
703
+ "rewards/margins": 5.077212333679199,
704
+ "rewards/rejected": 12.5089111328125,
705
+ "step": 400
706
+ },
707
+ {
708
+ "epoch": 0.36444444444444446,
709
+ "grad_norm": 81.61144256591797,
710
+ "learning_rate": 4.010590973802737e-06,
711
+ "logits/chosen": -0.09564584493637085,
712
+ "logits/rejected": -0.10617707669734955,
713
+ "logps/chosen": -3.4572842121124268,
714
+ "logps/rejected": -50.92162322998047,
715
+ "loss": 0.2478,
716
+ "rewards/accuracies": 0.8875000476837158,
717
+ "rewards/chosen": 17.010910034179688,
718
+ "rewards/margins": 4.556198596954346,
719
+ "rewards/rejected": 12.454713821411133,
720
+ "step": 410
721
+ },
722
+ {
723
+ "epoch": 0.37333333333333335,
724
+ "grad_norm": 0.30974289774894714,
725
+ "learning_rate": 3.948034230275781e-06,
726
+ "logits/chosen": -0.09134417027235031,
727
+ "logits/rejected": -0.1020016297698021,
728
+ "logps/chosen": -5.046698570251465,
729
+ "logps/rejected": -48.908958435058594,
730
+ "loss": 0.2894,
731
+ "rewards/accuracies": 0.8999999761581421,
732
+ "rewards/chosen": 17.007888793945312,
733
+ "rewards/margins": 4.53641414642334,
734
+ "rewards/rejected": 12.471475601196289,
735
+ "step": 420
736
+ },
737
+ {
738
+ "epoch": 0.37333333333333335,
739
+ "eval_logits/chosen": -0.09054450690746307,
740
+ "eval_logits/rejected": -0.10264354199171066,
741
+ "eval_logps/chosen": -1.913105845451355,
742
+ "eval_logps/rejected": -51.11127471923828,
743
+ "eval_loss": 0.16789735853672028,
744
+ "eval_rewards/accuracies": 0.9450000524520874,
745
+ "eval_rewards/chosen": 17.311582565307617,
746
+ "eval_rewards/margins": 5.061989784240723,
747
+ "eval_rewards/rejected": 12.249593734741211,
748
+ "eval_runtime": 361.5337,
749
+ "eval_samples_per_second": 2.766,
750
+ "eval_steps_per_second": 0.346,
751
+ "step": 420
752
+ },
753
+ {
754
+ "epoch": 0.38222222222222224,
755
+ "grad_norm": 12.824393272399902,
756
+ "learning_rate": 3.884082138308699e-06,
757
+ "logits/chosen": -0.08666776865720749,
758
+ "logits/rejected": -0.0997733399271965,
759
+ "logps/chosen": -1.7306327819824219,
760
+ "logps/rejected": -54.273292541503906,
761
+ "loss": 0.2298,
762
+ "rewards/accuracies": 0.9500000476837158,
763
+ "rewards/chosen": 17.167621612548828,
764
+ "rewards/margins": 5.065673351287842,
765
+ "rewards/rejected": 12.101947784423828,
766
+ "step": 430
767
+ },
768
+ {
769
+ "epoch": 0.39111111111111113,
770
+ "grad_norm": 0.30713599920272827,
771
+ "learning_rate": 3.818796323137896e-06,
772
+ "logits/chosen": -0.09174907952547073,
773
+ "logits/rejected": -0.10376611351966858,
774
+ "logps/chosen": -1.489154577255249,
775
+ "logps/rejected": -54.580726623535156,
776
+ "loss": 0.2513,
777
+ "rewards/accuracies": 0.9375,
778
+ "rewards/chosen": 17.22280502319336,
779
+ "rewards/margins": 5.175349235534668,
780
+ "rewards/rejected": 12.047454833984375,
781
+ "step": 440
782
+ },
783
+ {
784
+ "epoch": 0.4,
785
+ "grad_norm": 87.4791488647461,
786
+ "learning_rate": 3.7522396951963303e-06,
787
+ "logits/chosen": -0.09688778221607208,
788
+ "logits/rejected": -0.10897806286811829,
789
+ "logps/chosen": -3.157695770263672,
790
+ "logps/rejected": -50.96417236328125,
791
+ "loss": 0.1758,
792
+ "rewards/accuracies": 0.9500000476837158,
793
+ "rewards/chosen": 17.345651626586914,
794
+ "rewards/margins": 5.245656967163086,
795
+ "rewards/rejected": 12.099993705749512,
796
+ "step": 450
797
+ },
798
+ {
799
+ "epoch": 0.4088888888888889,
800
+ "grad_norm": 146.2008056640625,
801
+ "learning_rate": 3.684476389492026e-06,
802
+ "logits/chosen": -0.09378582239151001,
803
+ "logits/rejected": -0.10475654900074005,
804
+ "logps/chosen": -0.5611928701400757,
805
+ "logps/rejected": -56.518890380859375,
806
+ "loss": 0.1981,
807
+ "rewards/accuracies": 0.9500000476837158,
808
+ "rewards/chosen": 17.113712310791016,
809
+ "rewards/margins": 5.068872928619385,
810
+ "rewards/rejected": 12.044839859008789,
811
+ "step": 460
812
+ },
813
+ {
814
+ "epoch": 0.4177777777777778,
815
+ "grad_norm": 1.9137721061706543,
816
+ "learning_rate": 3.6155717038065783e-06,
817
+ "logits/chosen": -0.08695463836193085,
818
+ "logits/rejected": -0.09596743434667587,
819
+ "logps/chosen": -1.5298550128936768,
820
+ "logps/rejected": -50.27445983886719,
821
+ "loss": 0.2066,
822
+ "rewards/accuracies": 0.9375,
823
+ "rewards/chosen": 17.35186004638672,
824
+ "rewards/margins": 5.014693260192871,
825
+ "rewards/rejected": 12.337167739868164,
826
+ "step": 470
827
+ },
828
+ {
829
+ "epoch": 0.4266666666666667,
830
+ "grad_norm": 84.80391693115234,
831
+ "learning_rate": 3.545592035773192e-06,
832
+ "logits/chosen": -0.0746893435716629,
833
+ "logits/rejected": -0.08653923869132996,
834
+ "logps/chosen": -2.0052125453948975,
835
+ "logps/rejected": -57.502811431884766,
836
+ "loss": 0.1149,
837
+ "rewards/accuracies": 0.9500000476837158,
838
+ "rewards/chosen": 17.14373016357422,
839
+ "rewards/margins": 5.360415935516357,
840
+ "rewards/rejected": 11.783313751220703,
841
+ "step": 480
842
+ },
843
+ {
844
+ "epoch": 0.4266666666666667,
845
+ "eval_logits/chosen": -0.07700399309396744,
846
+ "eval_logits/rejected": -0.08828537166118622,
847
+ "eval_logps/chosen": -4.48896598815918,
848
+ "eval_logps/rejected": -53.76282501220703,
849
+ "eval_loss": 0.29511645436286926,
850
+ "eval_rewards/accuracies": 0.9230000376701355,
851
+ "eval_rewards/chosen": 17.053997039794922,
852
+ "eval_rewards/margins": 5.069558143615723,
853
+ "eval_rewards/rejected": 11.984437942504883,
854
+ "eval_runtime": 361.5035,
855
+ "eval_samples_per_second": 2.766,
856
+ "eval_steps_per_second": 0.346,
857
+ "step": 480
858
+ },
859
+ {
860
+ "epoch": 0.43555555555555553,
861
+ "grad_norm": 82.9616470336914,
862
+ "learning_rate": 3.4746048188948806e-06,
863
+ "logits/chosen": -0.06675051152706146,
864
+ "logits/rejected": -0.07860895991325378,
865
+ "logps/chosen": -4.162237167358398,
866
+ "logps/rejected": -54.77789306640625,
867
+ "loss": 0.2979,
868
+ "rewards/accuracies": 0.9125000238418579,
869
+ "rewards/chosen": 17.047603607177734,
870
+ "rewards/margins": 5.138361930847168,
871
+ "rewards/rejected": 11.909242630004883,
872
+ "step": 490
873
+ },
874
+ {
875
+ "epoch": 0.4444444444444444,
876
+ "grad_norm": 0.04293210059404373,
877
+ "learning_rate": 3.4026784575644887e-06,
878
+ "logits/chosen": -0.06424491107463837,
879
+ "logits/rejected": -0.07567107677459717,
880
+ "logps/chosen": -2.05729603767395,
881
+ "logps/rejected": -56.646087646484375,
882
+ "loss": 0.4378,
883
+ "rewards/accuracies": 0.8875000476837158,
884
+ "rewards/chosen": 16.947803497314453,
885
+ "rewards/margins": 4.919981956481934,
886
+ "rewards/rejected": 12.02782154083252,
887
+ "step": 500
888
+ },
889
+ {
890
+ "epoch": 0.4533333333333333,
891
+ "grad_norm": 0.07136644423007965,
892
+ "learning_rate": 3.329882261149148e-06,
893
+ "logits/chosen": -0.06423303484916687,
894
+ "logits/rejected": -0.07512776553630829,
895
+ "logps/chosen": -3.1519265174865723,
896
+ "logps/rejected": -53.53008270263672,
897
+ "loss": 0.2613,
898
+ "rewards/accuracies": 0.9375,
899
+ "rewards/chosen": 17.112144470214844,
900
+ "rewards/margins": 5.046430587768555,
901
+ "rewards/rejected": 12.065712928771973,
902
+ "step": 510
903
+ },
904
+ {
905
+ "epoch": 0.4622222222222222,
906
+ "grad_norm": 0.29279613494873047,
907
+ "learning_rate": 3.25628637720269e-06,
908
+ "logits/chosen": -0.060233693569898605,
909
+ "logits/rejected": -0.07076811790466309,
910
+ "logps/chosen": -1.2358124256134033,
911
+ "logps/rejected": -50.61806106567383,
912
+ "loss": 0.1517,
913
+ "rewards/accuracies": 0.9375,
914
+ "rewards/chosen": 17.457351684570312,
915
+ "rewards/margins": 5.2062835693359375,
916
+ "rewards/rejected": 12.251070022583008,
917
+ "step": 520
918
+ },
919
+ {
920
+ "epoch": 0.4711111111111111,
921
+ "grad_norm": 0.7946074604988098,
922
+ "learning_rate": 3.181961723870359e-06,
923
+ "logits/chosen": -0.054482050240039825,
924
+ "logits/rejected": -0.0657092034816742,
925
+ "logps/chosen": -0.7682158946990967,
926
+ "logps/rejected": -58.81409454345703,
927
+ "loss": 0.2304,
928
+ "rewards/accuracies": 0.9375,
929
+ "rewards/chosen": 17.075973510742188,
930
+ "rewards/margins": 5.222441673278809,
931
+ "rewards/rejected": 11.853530883789062,
932
+ "step": 530
933
+ },
934
+ {
935
+ "epoch": 0.48,
936
+ "grad_norm": 3.1405210494995117,
937
+ "learning_rate": 3.1069799215509847e-06,
938
+ "logits/chosen": -0.05050881579518318,
939
+ "logits/rejected": -0.061149902641773224,
940
+ "logps/chosen": -0.954046368598938,
941
+ "logps/rejected": -52.70227813720703,
942
+ "loss": 0.0384,
943
+ "rewards/accuracies": 0.987500011920929,
944
+ "rewards/chosen": 17.378849029541016,
945
+ "rewards/margins": 5.253483295440674,
946
+ "rewards/rejected": 12.125364303588867,
947
+ "step": 540
948
+ },
949
+ {
950
+ "epoch": 0.48,
951
+ "eval_logits/chosen": -0.05120665580034256,
952
+ "eval_logits/rejected": -0.06123337894678116,
953
+ "eval_logps/chosen": -2.9872913360595703,
954
+ "eval_logps/rejected": -52.27314758300781,
955
+ "eval_loss": 0.17387841641902924,
956
+ "eval_rewards/accuracies": 0.9490000605583191,
957
+ "eval_rewards/chosen": 17.204164505004883,
958
+ "eval_rewards/margins": 5.070757865905762,
959
+ "eval_rewards/rejected": 12.133406639099121,
960
+ "eval_runtime": 361.5449,
961
+ "eval_samples_per_second": 2.766,
962
+ "eval_steps_per_second": 0.346,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.4888888888888889,
967
+ "grad_norm": 0.07270358502864838,
968
+ "learning_rate": 3.0314132238824416e-06,
969
+ "logits/chosen": -0.05125313252210617,
970
+ "logits/rejected": -0.06174170970916748,
971
+ "logps/chosen": -3.2163877487182617,
972
+ "logps/rejected": -47.79279327392578,
973
+ "loss": 0.2087,
974
+ "rewards/accuracies": 0.9125000238418579,
975
+ "rewards/chosen": 17.408517837524414,
976
+ "rewards/margins": 5.061524391174316,
977
+ "rewards/rejected": 12.346992492675781,
978
+ "step": 550
979
+ },
980
+ {
981
+ "epoch": 0.49777777777777776,
982
+ "grad_norm": 0.10005924850702286,
983
+ "learning_rate": 2.955334448116915e-06,
984
+ "logits/chosen": -0.041773442178964615,
985
+ "logits/rejected": -0.05364570394158363,
986
+ "logps/chosen": -0.363404780626297,
987
+ "logps/rejected": -56.32415008544922,
988
+ "loss": 0.0969,
989
+ "rewards/accuracies": 0.9624999761581421,
990
+ "rewards/chosen": 17.3824462890625,
991
+ "rewards/margins": 5.5404510498046875,
992
+ "rewards/rejected": 11.841995239257812,
993
+ "step": 560
994
+ },
995
+ {
996
+ "epoch": 0.5066666666666667,
997
+ "grad_norm": 7.818356990814209,
998
+ "learning_rate": 2.8788169049530533e-06,
999
+ "logits/chosen": -0.04309462010860443,
1000
+ "logits/rejected": -0.05494442582130432,
1001
+ "logps/chosen": -2.2242724895477295,
1002
+ "logps/rejected": -56.444740295410156,
1003
+ "loss": 0.1447,
1004
+ "rewards/accuracies": 0.949999988079071,
1005
+ "rewards/chosen": 17.275390625,
1006
+ "rewards/margins": 5.55007791519165,
1007
+ "rewards/rejected": 11.725313186645508,
1008
+ "step": 570
1009
+ },
1010
+ {
1011
+ "epoch": 0.5155555555555555,
1012
+ "grad_norm": 0.03519747406244278,
1013
+ "learning_rate": 2.8019343278926397e-06,
1014
+ "logits/chosen": -0.03506368771195412,
1015
+ "logits/rejected": -0.046854715794324875,
1016
+ "logps/chosen": -0.5200096964836121,
1017
+ "logps/rejected": -59.05330276489258,
1018
+ "loss": 0.0998,
1019
+ "rewards/accuracies": 0.949999988079071,
1020
+ "rewards/chosen": 17.320554733276367,
1021
+ "rewards/margins": 5.727260589599609,
1022
+ "rewards/rejected": 11.593294143676758,
1023
+ "step": 580
1024
+ },
1025
+ {
1026
+ "epoch": 0.5244444444444445,
1027
+ "grad_norm": 0.04519123584032059,
1028
+ "learning_rate": 2.7247608021898265e-06,
1029
+ "logits/chosen": -0.03204537555575371,
1030
+ "logits/rejected": -0.04383891448378563,
1031
+ "logps/chosen": -1.1271060705184937,
1032
+ "logps/rejected": -59.308895111083984,
1033
+ "loss": 0.1162,
1034
+ "rewards/accuracies": 0.9750000238418579,
1035
+ "rewards/chosen": 17.256351470947266,
1036
+ "rewards/margins": 5.6881890296936035,
1037
+ "rewards/rejected": 11.568161010742188,
1038
+ "step": 590
1039
+ },
1040
+ {
1041
+ "epoch": 0.5333333333333333,
1042
+ "grad_norm": 0.028689857572317123,
1043
+ "learning_rate": 2.647370693461432e-06,
1044
+ "logits/chosen": -0.02834726870059967,
1045
+ "logits/rejected": -0.03827046602964401,
1046
+ "logps/chosen": -5.673943519592285,
1047
+ "logps/rejected": -55.72624588012695,
1048
+ "loss": 0.4008,
1049
+ "rewards/accuracies": 0.8875000476837158,
1050
+ "rewards/chosen": 16.761056900024414,
1051
+ "rewards/margins": 4.804043769836426,
1052
+ "rewards/rejected": 11.957012176513672,
1053
+ "step": 600
1054
+ },
1055
+ {
1056
+ "epoch": 0.5333333333333333,
1057
+ "eval_logits/chosen": -0.035840023308992386,
1058
+ "eval_logits/rejected": -0.04687971621751785,
1059
+ "eval_logps/chosen": -2.1760435104370117,
1060
+ "eval_logps/rejected": -56.62664031982422,
1061
+ "eval_loss": 0.17061151564121246,
1062
+ "eval_rewards/accuracies": 0.9470000267028809,
1063
+ "eval_rewards/chosen": 17.285289764404297,
1064
+ "eval_rewards/margins": 5.587231636047363,
1065
+ "eval_rewards/rejected": 11.698057174682617,
1066
+ "eval_runtime": 361.5056,
1067
+ "eval_samples_per_second": 2.766,
1068
+ "eval_steps_per_second": 0.346,
1069
+ "step": 600
1070
+ },
1071
+ {
1072
+ "epoch": 0.5422222222222223,
1073
+ "grad_norm": 1.1994622945785522,
1074
+ "learning_rate": 2.569838576027068e-06,
1075
+ "logits/chosen": -0.03231767192482948,
1076
+ "logits/rejected": -0.04397805407643318,
1077
+ "logps/chosen": -1.5904741287231445,
1078
+ "logps/rejected": -62.10520553588867,
1079
+ "loss": 0.1265,
1080
+ "rewards/accuracies": 0.9624999761581421,
1081
+ "rewards/chosen": 17.11206817626953,
1082
+ "rewards/margins": 5.734784126281738,
1083
+ "rewards/rejected": 11.37728500366211,
1084
+ "step": 610
1085
+ },
1086
+ {
1087
+ "epoch": 0.5511111111111111,
1088
+ "grad_norm": 0.14163845777511597,
1089
+ "learning_rate": 2.4922391610481544e-06,
1090
+ "logits/chosen": -0.03293662518262863,
1091
+ "logits/rejected": -0.04335154965519905,
1092
+ "logps/chosen": -1.3568997383117676,
1093
+ "logps/rejected": -58.347434997558594,
1094
+ "loss": 0.097,
1095
+ "rewards/accuracies": 0.9624999761581421,
1096
+ "rewards/chosen": 17.345399856567383,
1097
+ "rewards/margins": 5.823373794555664,
1098
+ "rewards/rejected": 11.522026062011719,
1099
+ "step": 620
1100
+ },
1101
+ {
1102
+ "epoch": 0.56,
1103
+ "grad_norm": 1.125027060508728,
1104
+ "learning_rate": 2.4146472245350804e-06,
1105
+ "logits/chosen": -0.02864963933825493,
1106
+ "logits/rejected": -0.03835710883140564,
1107
+ "logps/chosen": -2.494175434112549,
1108
+ "logps/rejected": -55.33067321777344,
1109
+ "loss": 0.3115,
1110
+ "rewards/accuracies": 0.925000011920929,
1111
+ "rewards/chosen": 17.262205123901367,
1112
+ "rewards/margins": 5.4287261962890625,
1113
+ "rewards/rejected": 11.833479881286621,
1114
+ "step": 630
1115
+ },
1116
+ {
1117
+ "epoch": 0.5688888888888889,
1118
+ "grad_norm": 0.03514016419649124,
1119
+ "learning_rate": 2.337137535291868e-06,
1120
+ "logits/chosen": -0.02757749892771244,
1121
+ "logits/rejected": -0.0376611053943634,
1122
+ "logps/chosen": -2.3163387775421143,
1123
+ "logps/rejected": -53.54579162597656,
1124
+ "loss": 0.1268,
1125
+ "rewards/accuracies": 0.9500000476837158,
1126
+ "rewards/chosen": 17.542863845825195,
1127
+ "rewards/margins": 5.799897193908691,
1128
+ "rewards/rejected": 11.742965698242188,
1129
+ "step": 640
1130
+ },
1131
+ {
1132
+ "epoch": 0.5777777777777777,
1133
+ "grad_norm": 34.20791244506836,
1134
+ "learning_rate": 2.259784782867782e-06,
1135
+ "logits/chosen": -0.02032250165939331,
1136
+ "logits/rejected": -0.031542714685201645,
1137
+ "logps/chosen": -1.7910137176513672,
1138
+ "logps/rejected": -62.980018615722656,
1139
+ "loss": 0.1831,
1140
+ "rewards/accuracies": 0.9500000476837158,
1141
+ "rewards/chosen": 16.982879638671875,
1142
+ "rewards/margins": 5.589078903198242,
1143
+ "rewards/rejected": 11.393800735473633,
1144
+ "step": 650
1145
+ },
1146
+ {
1147
+ "epoch": 0.5866666666666667,
1148
+ "grad_norm": 32.356956481933594,
1149
+ "learning_rate": 2.182663505585314e-06,
1150
+ "logits/chosen": -0.01711965538561344,
1151
+ "logits/rejected": -0.028038471937179565,
1152
+ "logps/chosen": -2.662904739379883,
1153
+ "logps/rejected": -63.326297760009766,
1154
+ "loss": 0.1678,
1155
+ "rewards/accuracies": 0.9624999761581421,
1156
+ "rewards/chosen": 16.997777938842773,
1157
+ "rewards/margins": 5.741157531738281,
1158
+ "rewards/rejected": 11.256620407104492,
1159
+ "step": 660
1160
+ },
1161
+ {
1162
+ "epoch": 0.5866666666666667,
1163
+ "eval_logits/chosen": -0.016018809750676155,
1164
+ "eval_logits/rejected": -0.027020033448934555,
1165
+ "eval_logps/chosen": -3.00822114944458,
1166
+ "eval_logps/rejected": -57.951629638671875,
1167
+ "eval_loss": 0.2050127536058426,
1168
+ "eval_rewards/accuracies": 0.9450000524520874,
1169
+ "eval_rewards/chosen": 17.202072143554688,
1170
+ "eval_rewards/margins": 5.636512756347656,
1171
+ "eval_rewards/rejected": 11.565557479858398,
1172
+ "eval_runtime": 361.5073,
1173
+ "eval_samples_per_second": 2.766,
1174
+ "eval_steps_per_second": 0.346,
1175
+ "step": 660
1176
+ },
1177
+ {
1178
+ "epoch": 0.5955555555555555,
1179
+ "grad_norm": 86.53874969482422,
1180
+ "learning_rate": 2.1058480187138863e-06,
1181
+ "logits/chosen": -0.012352555990219116,
1182
+ "logits/rejected": -0.023966707289218903,
1183
+ "logps/chosen": -2.129209041595459,
1184
+ "logps/rejected": -62.1393928527832,
1185
+ "loss": 0.1025,
1186
+ "rewards/accuracies": 0.9624999761581421,
1187
+ "rewards/chosen": 17.069538116455078,
1188
+ "rewards/margins": 5.678930759429932,
1189
+ "rewards/rejected": 11.390605926513672,
1190
+ "step": 670
1191
+ },
1192
+ {
1193
+ "epoch": 0.6044444444444445,
1194
+ "grad_norm": 2.892672300338745,
1195
+ "learning_rate": 2.0294123428584985e-06,
1196
+ "logits/chosen": -0.011562807485461235,
1197
+ "logits/rejected": -0.020859256386756897,
1198
+ "logps/chosen": -3.2486608028411865,
1199
+ "logps/rejected": -59.56721496582031,
1200
+ "loss": 0.1961,
1201
+ "rewards/accuracies": 0.9500000476837158,
1202
+ "rewards/chosen": 17.051376342773438,
1203
+ "rewards/margins": 5.515361785888672,
1204
+ "rewards/rejected": 11.536016464233398,
1205
+ "step": 680
1206
+ },
1207
+ {
1208
+ "epoch": 0.6133333333333333,
1209
+ "grad_norm": 30.26588249206543,
1210
+ "learning_rate": 1.953430132632311e-06,
1211
+ "logits/chosen": -0.011488726362586021,
1212
+ "logits/rejected": -0.021527227014303207,
1213
+ "logps/chosen": -1.794357180595398,
1214
+ "logps/rejected": -60.649505615234375,
1215
+ "loss": 0.2821,
1216
+ "rewards/accuracies": 0.9375,
1217
+ "rewards/chosen": 17.114843368530273,
1218
+ "rewards/margins": 5.612217903137207,
1219
+ "rewards/rejected": 11.50262451171875,
1220
+ "step": 690
1221
+ },
1222
+ {
1223
+ "epoch": 0.6222222222222222,
1224
+ "grad_norm": 0.030314341187477112,
1225
+ "learning_rate": 1.8779746056819104e-06,
1226
+ "logits/chosen": -0.014436552301049232,
1227
+ "logits/rejected": -0.026044374331831932,
1228
+ "logps/chosen": -3.1617178916931152,
1229
+ "logps/rejected": -60.94548797607422,
1230
+ "loss": 0.3141,
1231
+ "rewards/accuracies": 0.9375,
1232
+ "rewards/chosen": 17.08698272705078,
1233
+ "rewards/margins": 5.719264984130859,
1234
+ "rewards/rejected": 11.367716789245605,
1235
+ "step": 700
1236
+ },
1237
+ {
1238
+ "epoch": 0.6311111111111111,
1239
+ "grad_norm": 0.06711317598819733,
1240
+ "learning_rate": 1.8031184721336364e-06,
1241
+ "logits/chosen": -0.017312290146946907,
1242
+ "logits/rejected": -0.026554957032203674,
1243
+ "logps/chosen": -5.120705604553223,
1244
+ "logps/rejected": -54.33483123779297,
1245
+ "loss": 0.3827,
1246
+ "rewards/accuracies": 0.887499988079071,
1247
+ "rewards/chosen": 17.1247501373291,
1248
+ "rewards/margins": 5.327882766723633,
1249
+ "rewards/rejected": 11.796867370605469,
1250
+ "step": 710
1251
+ },
1252
+ {
1253
+ "epoch": 0.64,
1254
+ "grad_norm": 60.36637496948242,
1255
+ "learning_rate": 1.7289338645289711e-06,
1256
+ "logits/chosen": -0.01987219974398613,
1257
+ "logits/rejected": -0.02766304835677147,
1258
+ "logps/chosen": -2.1757986545562744,
1259
+ "logps/rejected": -54.02531433105469,
1260
+ "loss": 0.2272,
1261
+ "rewards/accuracies": 0.949999988079071,
1262
+ "rewards/chosen": 17.239776611328125,
1263
+ "rewards/margins": 5.250313758850098,
1264
+ "rewards/rejected": 11.989462852478027,
1265
+ "step": 720
1266
+ },
1267
+ {
1268
+ "epoch": 0.64,
1269
+ "eval_logits/chosen": -0.022905193269252777,
1270
+ "eval_logits/rejected": -0.0321992002427578,
1271
+ "eval_logps/chosen": -1.100506067276001,
1272
+ "eval_logps/rejected": -55.91169738769531,
1273
+ "eval_loss": 0.14018221199512482,
1274
+ "eval_rewards/accuracies": 0.9520000219345093,
1275
+ "eval_rewards/chosen": 17.392841339111328,
1276
+ "eval_rewards/margins": 5.623291492462158,
1277
+ "eval_rewards/rejected": 11.769551277160645,
1278
+ "eval_runtime": 361.416,
1279
+ "eval_samples_per_second": 2.767,
1280
+ "eval_steps_per_second": 0.346,
1281
+ "step": 720
1282
+ },
1283
+ {
1284
+ "epoch": 0.6488888888888888,
1285
+ "grad_norm": 0.029294608160853386,
1286
+ "learning_rate": 1.6554922683164875e-06,
1287
+ "logits/chosen": -0.016416028141975403,
1288
+ "logits/rejected": -0.025527067482471466,
1289
+ "logps/chosen": -0.906692385673523,
1290
+ "logps/rejected": -59.142173767089844,
1291
+ "loss": 0.1171,
1292
+ "rewards/accuracies": 0.9624999761581421,
1293
+ "rewards/chosen": 17.282169342041016,
1294
+ "rewards/margins": 5.6978583335876465,
1295
+ "rewards/rejected": 11.584310531616211,
1296
+ "step": 730
1297
+ },
1298
+ {
1299
+ "epoch": 0.6577777777777778,
1300
+ "grad_norm": 33.89070510864258,
1301
+ "learning_rate": 1.5828644529673592e-06,
1302
+ "logits/chosen": -0.016044551506638527,
1303
+ "logits/rejected": -0.025811903178691864,
1304
+ "logps/chosen": -1.1167538166046143,
1305
+ "logps/rejected": -59.84492492675781,
1306
+ "loss": 0.2001,
1307
+ "rewards/accuracies": 0.9624999761581421,
1308
+ "rewards/chosen": 17.266983032226562,
1309
+ "rewards/margins": 5.749438285827637,
1310
+ "rewards/rejected": 11.51754379272461,
1311
+ "step": 740
1312
+ },
1313
+ {
1314
+ "epoch": 0.6666666666666666,
1315
+ "grad_norm": 0.53726726770401,
1316
+ "learning_rate": 1.5111204037807844e-06,
1317
+ "logits/chosen": -0.012529855594038963,
1318
+ "logits/rejected": -0.023765765130519867,
1319
+ "logps/chosen": -0.3461765646934509,
1320
+ "logps/rejected": -55.841102600097656,
1321
+ "loss": 0.1692,
1322
+ "rewards/accuracies": 0.9375,
1323
+ "rewards/chosen": 17.61865997314453,
1324
+ "rewards/margins": 6.019055366516113,
1325
+ "rewards/rejected": 11.599604606628418,
1326
+ "step": 750
1327
+ },
1328
+ {
1329
+ "epoch": 0.6755555555555556,
1330
+ "grad_norm": 0.7173987627029419,
1331
+ "learning_rate": 1.4403292544450625e-06,
1332
+ "logits/chosen": -0.013380522839725018,
1333
+ "logits/rejected": -0.022107835859060287,
1334
+ "logps/chosen": -3.2759666442871094,
1335
+ "logps/rejected": -53.836822509765625,
1336
+ "loss": 0.2418,
1337
+ "rewards/accuracies": 0.949999988079071,
1338
+ "rewards/chosen": 17.383333206176758,
1339
+ "rewards/margins": 5.590358734130859,
1340
+ "rewards/rejected": 11.792974472045898,
1341
+ "step": 760
1342
+ },
1343
+ {
1344
+ "epoch": 0.6844444444444444,
1345
+ "grad_norm": 2.744900941848755,
1346
+ "learning_rate": 1.3705592204192853e-06,
1347
+ "logits/chosen": -0.011318420059978962,
1348
+ "logits/rejected": -0.021840626373887062,
1349
+ "logps/chosen": -2.592001438140869,
1350
+ "logps/rejected": -57.06926345825195,
1351
+ "loss": 0.1749,
1352
+ "rewards/accuracies": 0.9500000476837158,
1353
+ "rewards/chosen": 17.286128997802734,
1354
+ "rewards/margins": 5.662715911865234,
1355
+ "rewards/rejected": 11.623414993286133,
1356
+ "step": 770
1357
+ },
1358
+ {
1359
+ "epoch": 0.6933333333333334,
1360
+ "grad_norm": 70.76551818847656,
1361
+ "learning_rate": 1.301877533199859e-06,
1362
+ "logits/chosen": -0.013944407925009727,
1363
+ "logits/rejected": -0.025667501613497734,
1364
+ "logps/chosen": -2.1070234775543213,
1365
+ "logps/rejected": -57.72039794921875,
1366
+ "loss": 0.1915,
1367
+ "rewards/accuracies": 0.925000011920929,
1368
+ "rewards/chosen": 17.28545379638672,
1369
+ "rewards/margins": 5.708344459533691,
1370
+ "rewards/rejected": 11.577108383178711,
1371
+ "step": 780
1372
+ },
1373
+ {
1374
+ "epoch": 0.6933333333333334,
1375
+ "eval_logits/chosen": -0.01664295792579651,
1376
+ "eval_logits/rejected": -0.026626665145158768,
1377
+ "eval_logps/chosen": -1.082255244255066,
1378
+ "eval_logps/rejected": -55.95073699951172,
1379
+ "eval_loss": 0.24412688612937927,
1380
+ "eval_rewards/accuracies": 0.9320000410079956,
1381
+ "eval_rewards/chosen": 17.394668579101562,
1382
+ "eval_rewards/margins": 5.629020690917969,
1383
+ "eval_rewards/rejected": 11.765647888183594,
1384
+ "eval_runtime": 361.4602,
1385
+ "eval_samples_per_second": 2.767,
1386
+ "eval_steps_per_second": 0.346,
1387
+ "step": 780
1388
+ },
1389
+ {
1390
+ "epoch": 0.7022222222222222,
1391
+ "grad_norm": 0.41359376907348633,
1392
+ "learning_rate": 1.2343503755351729e-06,
1393
+ "logits/chosen": -0.012772129848599434,
1394
+ "logits/rejected": -0.023186586797237396,
1395
+ "logps/chosen": -0.8030359148979187,
1396
+ "logps/rejected": -57.84947967529297,
1397
+ "loss": 0.2143,
1398
+ "rewards/accuracies": 0.9375,
1399
+ "rewards/chosen": 17.32242202758789,
1400
+ "rewards/margins": 5.645486831665039,
1401
+ "rewards/rejected": 11.676933288574219,
1402
+ "step": 790
1403
+ },
1404
+ {
1405
+ "epoch": 0.7111111111111111,
1406
+ "grad_norm": 0.41748157143592834,
1407
+ "learning_rate": 1.168042817650881e-06,
1408
+ "logits/chosen": -0.012502101249992847,
1409
+ "logits/rejected": -0.023272844031453133,
1410
+ "logps/chosen": -1.0377256870269775,
1411
+ "logps/rejected": -57.05615997314453,
1412
+ "loss": 0.0792,
1413
+ "rewards/accuracies": 0.9750000238418579,
1414
+ "rewards/chosen": 17.60501480102539,
1415
+ "rewards/margins": 6.159370422363281,
1416
+ "rewards/rejected": 11.445646286010742,
1417
+ "step": 800
1418
+ },
1419
+ {
1420
+ "epoch": 0.72,
1421
+ "grad_norm": 0.039210401475429535,
1422
+ "learning_rate": 1.1030187545472012e-06,
1423
+ "logits/chosen": -0.008177272044122219,
1424
+ "logits/rejected": -0.01832464337348938,
1425
+ "logps/chosen": -3.8701748847961426,
1426
+ "logps/rejected": -57.661476135253906,
1427
+ "loss": 0.3177,
1428
+ "rewards/accuracies": 0.925000011920929,
1429
+ "rewards/chosen": 17.14336395263672,
1430
+ "rewards/margins": 5.583393573760986,
1431
+ "rewards/rejected": 11.55997085571289,
1432
+ "step": 810
1433
+ },
1434
+ {
1435
+ "epoch": 0.7288888888888889,
1436
+ "grad_norm": 1.6096951961517334,
1437
+ "learning_rate": 1.0393408444287048e-06,
1438
+ "logits/chosen": -0.006832236424088478,
1439
+ "logits/rejected": -0.01682097464799881,
1440
+ "logps/chosen": -2.1741790771484375,
1441
+ "logps/rejected": -57.273414611816406,
1442
+ "loss": 0.2128,
1443
+ "rewards/accuracies": 0.949999988079071,
1444
+ "rewards/chosen": 17.426712036132812,
1445
+ "rewards/margins": 5.911205291748047,
1446
+ "rewards/rejected": 11.515506744384766,
1447
+ "step": 820
1448
+ },
1449
+ {
1450
+ "epoch": 0.7377777777777778,
1451
+ "grad_norm": 3.7820959091186523,
1452
+ "learning_rate": 9.770704483258782e-07,
1453
+ "logits/chosen": -0.009998206980526447,
1454
+ "logits/rejected": -0.0204261876642704,
1455
+ "logps/chosen": -1.9802953004837036,
1456
+ "logps/rejected": -56.901512145996094,
1457
+ "loss": 0.1296,
1458
+ "rewards/accuracies": 0.9750000238418579,
1459
+ "rewards/chosen": 17.56071662902832,
1460
+ "rewards/margins": 6.169583320617676,
1461
+ "rewards/rejected": 11.391134262084961,
1462
+ "step": 830
1463
+ },
1464
+ {
1465
+ "epoch": 0.7466666666666667,
1466
+ "grad_norm": 1.126626968383789,
1467
+ "learning_rate": 9.162675709666865e-07,
1468
+ "logits/chosen": -0.00826224498450756,
1469
+ "logits/rejected": -0.018977787345647812,
1470
+ "logps/chosen": -1.4256607294082642,
1471
+ "logps/rejected": -61.65986633300781,
1472
+ "loss": 0.0635,
1473
+ "rewards/accuracies": 0.987500011920929,
1474
+ "rewards/chosen": 17.324975967407227,
1475
+ "rewards/margins": 6.09440803527832,
1476
+ "rewards/rejected": 11.230567932128906,
1477
+ "step": 840
1478
+ },
1479
+ {
1480
+ "epoch": 0.7466666666666667,
1481
+ "eval_logits/chosen": -0.01111944392323494,
1482
+ "eval_logits/rejected": -0.021697774529457092,
1483
+ "eval_logps/chosen": -1.2168633937835693,
1484
+ "eval_logps/rejected": -58.2642822265625,
1485
+ "eval_loss": 0.1689341962337494,
1486
+ "eval_rewards/accuracies": 0.9450000524520874,
1487
+ "eval_rewards/chosen": 17.381206512451172,
1488
+ "eval_rewards/margins": 5.846914291381836,
1489
+ "eval_rewards/rejected": 11.534292221069336,
1490
+ "eval_runtime": 361.6192,
1491
+ "eval_samples_per_second": 2.765,
1492
+ "eval_steps_per_second": 0.346,
1493
+ "step": 840
1494
+ },
1495
+ {
1496
+ "epoch": 0.7555555555555555,
1497
+ "grad_norm": 5.250723838806152,
1498
+ "learning_rate": 8.569908029550686e-07,
1499
+ "logits/chosen": -0.006854387000203133,
1500
+ "logits/rejected": -0.018336206674575806,
1501
+ "logps/chosen": -0.6238930821418762,
1502
+ "logps/rejected": -60.925689697265625,
1503
+ "loss": 0.1157,
1504
+ "rewards/accuracies": 0.9750000238418579,
1505
+ "rewards/chosen": 17.443281173706055,
1506
+ "rewards/margins": 6.164813995361328,
1507
+ "rewards/rejected": 11.278467178344727,
1508
+ "step": 850
1509
+ },
1510
+ {
1511
+ "epoch": 0.7644444444444445,
1512
+ "grad_norm": 3.1401162147521973,
1513
+ "learning_rate": 7.992972643121227e-07,
1514
+ "logits/chosen": -0.0037835021503269672,
1515
+ "logits/rejected": -0.013135241344571114,
1516
+ "logps/chosen": -0.8492221832275391,
1517
+ "logps/rejected": -55.516075134277344,
1518
+ "loss": 0.2252,
1519
+ "rewards/accuracies": 0.9375,
1520
+ "rewards/chosen": 17.488588333129883,
1521
+ "rewards/margins": 5.736725807189941,
1522
+ "rewards/rejected": 11.751862525939941,
1523
+ "step": 860
1524
+ },
1525
+ {
1526
+ "epoch": 0.7733333333333333,
1527
+ "grad_norm": 55.528812408447266,
1528
+ "learning_rate": 7.432425494343509e-07,
1529
+ "logits/chosen": -0.0033687639515846968,
1530
+ "logits/rejected": -0.013152632862329483,
1531
+ "logps/chosen": -1.3188884258270264,
1532
+ "logps/rejected": -57.9510498046875,
1533
+ "loss": 0.1398,
1534
+ "rewards/accuracies": 0.9624999761581421,
1535
+ "rewards/chosen": 17.412578582763672,
1536
+ "rewards/margins": 5.868515968322754,
1537
+ "rewards/rejected": 11.544061660766602,
1538
+ "step": 870
1539
+ },
1540
+ {
1541
+ "epoch": 0.7822222222222223,
1542
+ "grad_norm": 0.039824869483709335,
1543
+ "learning_rate": 6.888806735220396e-07,
1544
+ "logits/chosen": -0.0010406378423795104,
1545
+ "logits/rejected": -0.012095071375370026,
1546
+ "logps/chosen": -2.0619027614593506,
1547
+ "logps/rejected": -59.65806579589844,
1548
+ "loss": 0.2966,
1549
+ "rewards/accuracies": 0.9375,
1550
+ "rewards/chosen": 17.192535400390625,
1551
+ "rewards/margins": 5.7006731033325195,
1552
+ "rewards/rejected": 11.491861343383789,
1553
+ "step": 880
1554
+ },
1555
+ {
1556
+ "epoch": 0.7911111111111111,
1557
+ "grad_norm": 0.7536466717720032,
1558
+ "learning_rate": 6.362640205293583e-07,
1559
+ "logits/chosen": -0.0016857212176546454,
1560
+ "logits/rejected": -0.010936147533357143,
1561
+ "logps/chosen": -2.1478958129882812,
1562
+ "logps/rejected": -58.20386505126953,
1563
+ "loss": 0.1641,
1564
+ "rewards/accuracies": 0.9375,
1565
+ "rewards/chosen": 17.243247985839844,
1566
+ "rewards/margins": 5.678771018981934,
1567
+ "rewards/rejected": 11.564477920532227,
1568
+ "step": 890
1569
+ },
1570
+ {
1571
+ "epoch": 0.8,
1572
+ "grad_norm": 0.3857377767562866,
1573
+ "learning_rate": 5.854432926863684e-07,
1574
+ "logits/chosen": 0.00038508616853505373,
1575
+ "logits/rejected": -0.011034643277525902,
1576
+ "logps/chosen": -1.606274127960205,
1577
+ "logps/rejected": -62.58662414550781,
1578
+ "loss": 0.1703,
1579
+ "rewards/accuracies": 0.9624999761581421,
1580
+ "rewards/chosen": 17.211261749267578,
1581
+ "rewards/margins": 5.966868877410889,
1582
+ "rewards/rejected": 11.244392395019531,
1583
+ "step": 900
1584
+ },
1585
+ {
1586
+ "epoch": 0.8,
1587
+ "eval_logits/chosen": 0.00021816430671606213,
1588
+ "eval_logits/rejected": -0.010477552190423012,
1589
+ "eval_logps/chosen": -1.7576563358306885,
1590
+ "eval_logps/rejected": -59.79063415527344,
1591
+ "eval_loss": 0.13995186984539032,
1592
+ "eval_rewards/accuracies": 0.9610000848770142,
1593
+ "eval_rewards/chosen": 17.327129364013672,
1594
+ "eval_rewards/margins": 5.945469856262207,
1595
+ "eval_rewards/rejected": 11.381658554077148,
1596
+ "eval_runtime": 361.4592,
1597
+ "eval_samples_per_second": 2.767,
1598
+ "eval_steps_per_second": 0.346,
1599
+ "step": 900
1600
+ },
1601
+ {
1602
+ "epoch": 0.8088888888888889,
1603
+ "grad_norm": 0.13143697381019592,
1604
+ "learning_rate": 5.364674616415547e-07,
1605
+ "logits/chosen": 0.0005570838693529367,
1606
+ "logits/rejected": -0.011198626831173897,
1607
+ "logps/chosen": -0.12791283428668976,
1608
+ "logps/rejected": -65.7052993774414,
1609
+ "loss": 0.0507,
1610
+ "rewards/accuracies": 0.9750000238418579,
1611
+ "rewards/chosen": 17.224443435668945,
1612
+ "rewards/margins": 6.163690567016602,
1613
+ "rewards/rejected": 11.060752868652344,
1614
+ "step": 910
1615
+ },
1616
+ {
1617
+ "epoch": 0.8177777777777778,
1618
+ "grad_norm": 102.9654541015625,
1619
+ "learning_rate": 4.893837212719859e-07,
1620
+ "logits/chosen": -0.0008557128603570163,
1621
+ "logits/rejected": -0.01163212489336729,
1622
+ "logps/chosen": -1.3292646408081055,
1623
+ "logps/rejected": -61.644893646240234,
1624
+ "loss": 0.0881,
1625
+ "rewards/accuracies": 0.9500000476837158,
1626
+ "rewards/chosen": 17.276538848876953,
1627
+ "rewards/margins": 5.96080207824707,
1628
+ "rewards/rejected": 11.3157377243042,
1629
+ "step": 920
1630
+ },
1631
+ {
1632
+ "epoch": 0.8266666666666667,
1633
+ "grad_norm": 146.04498291015625,
1634
+ "learning_rate": 4.442374422065493e-07,
1635
+ "logits/chosen": 0.002922601066529751,
1636
+ "logits/rejected": -0.007130052894353867,
1637
+ "logps/chosen": -1.6069023609161377,
1638
+ "logps/rejected": -61.52588653564453,
1639
+ "loss": 0.1373,
1640
+ "rewards/accuracies": 0.949999988079071,
1641
+ "rewards/chosen": 17.184974670410156,
1642
+ "rewards/margins": 5.7995734214782715,
1643
+ "rewards/rejected": 11.38540267944336,
1644
+ "step": 930
1645
+ },
1646
+ {
1647
+ "epoch": 0.8355555555555556,
1648
+ "grad_norm": 23.539485931396484,
1649
+ "learning_rate": 4.0107212810610974e-07,
1650
+ "logits/chosen": 0.0018056132830679417,
1651
+ "logits/rejected": -0.007847340777516365,
1652
+ "logps/chosen": -3.637047290802002,
1653
+ "logps/rejected": -61.21245574951172,
1654
+ "loss": 0.2763,
1655
+ "rewards/accuracies": 0.925000011920929,
1656
+ "rewards/chosen": 16.960758209228516,
1657
+ "rewards/margins": 5.551811695098877,
1658
+ "rewards/rejected": 11.40894603729248,
1659
+ "step": 940
1660
+ },
1661
+ {
1662
+ "epoch": 0.8444444444444444,
1663
+ "grad_norm": 31.52926254272461,
1664
+ "learning_rate": 3.599293737426932e-07,
1665
+ "logits/chosen": 0.0032081177923828363,
1666
+ "logits/rejected": -0.007756482809782028,
1667
+ "logps/chosen": -1.172515869140625,
1668
+ "logps/rejected": -66.5853271484375,
1669
+ "loss": 0.1758,
1670
+ "rewards/accuracies": 0.949999988079071,
1671
+ "rewards/chosen": 16.992876052856445,
1672
+ "rewards/margins": 5.886469841003418,
1673
+ "rewards/rejected": 11.106407165527344,
1674
+ "step": 950
1675
+ },
1676
+ {
1677
+ "epoch": 0.8533333333333334,
1678
+ "grad_norm": 0.07474468648433685,
1679
+ "learning_rate": 3.208488249181216e-07,
1680
+ "logits/chosen": 0.0022508346009999514,
1681
+ "logits/rejected": -0.009156409651041031,
1682
+ "logps/chosen": -0.7930470108985901,
1683
+ "logps/rejected": -60.068790435791016,
1684
+ "loss": 0.1138,
1685
+ "rewards/accuracies": 0.9624999761581421,
1686
+ "rewards/chosen": 17.401771545410156,
1687
+ "rewards/margins": 6.0026960372924805,
1688
+ "rewards/rejected": 11.399076461791992,
1689
+ "step": 960
1690
+ },
1691
+ {
1692
+ "epoch": 0.8533333333333334,
1693
+ "eval_logits/chosen": 0.001469604205340147,
1694
+ "eval_logits/rejected": -0.009397665038704872,
1695
+ "eval_logps/chosen": -1.8795456886291504,
1696
+ "eval_logps/rejected": -60.17564010620117,
1697
+ "eval_loss": 0.1441129744052887,
1698
+ "eval_rewards/accuracies": 0.9630000591278076,
1699
+ "eval_rewards/chosen": 17.314937591552734,
1700
+ "eval_rewards/margins": 5.9717817306518555,
1701
+ "eval_rewards/rejected": 11.343156814575195,
1702
+ "eval_runtime": 361.5344,
1703
+ "eval_samples_per_second": 2.766,
1704
+ "eval_steps_per_second": 0.346,
1705
+ "step": 960
1706
+ },
1707
+ {
1708
+ "epoch": 0.8622222222222222,
1709
+ "grad_norm": 68.90747833251953,
1710
+ "learning_rate": 2.838681402606952e-07,
1711
+ "logits/chosen": 0.004552370868623257,
1712
+ "logits/rejected": -0.005488495342433453,
1713
+ "logps/chosen": -3.7298974990844727,
1714
+ "logps/rejected": -64.72488403320312,
1715
+ "loss": 0.2425,
1716
+ "rewards/accuracies": 0.925000011920929,
1717
+ "rewards/chosen": 16.832754135131836,
1718
+ "rewards/margins": 5.644216537475586,
1719
+ "rewards/rejected": 11.18853759765625,
1720
+ "step": 970
1721
+ },
1722
+ {
1723
+ "epoch": 0.8711111111111111,
1724
+ "grad_norm": 0.028206102550029755,
1725
+ "learning_rate": 2.490229549367443e-07,
1726
+ "logits/chosen": 0.0025807656347751617,
1727
+ "logits/rejected": -0.008657123893499374,
1728
+ "logps/chosen": -1.1825838088989258,
1729
+ "logps/rejected": -63.57493591308594,
1730
+ "loss": 0.0609,
1731
+ "rewards/accuracies": 0.9750000238418579,
1732
+ "rewards/chosen": 17.227130889892578,
1733
+ "rewards/margins": 6.053717136383057,
1734
+ "rewards/rejected": 11.173412322998047,
1735
+ "step": 980
1736
+ },
1737
+ {
1738
+ "epoch": 0.88,
1739
+ "grad_norm": 0.39742231369018555,
1740
+ "learning_rate": 2.1634684631203412e-07,
1741
+ "logits/chosen": 0.0048486413434147835,
1742
+ "logits/rejected": -0.006056814920157194,
1743
+ "logps/chosen": -2.919680595397949,
1744
+ "logps/rejected": -57.8403205871582,
1745
+ "loss": 0.3464,
1746
+ "rewards/accuracies": 0.8999999761581421,
1747
+ "rewards/chosen": 17.175275802612305,
1748
+ "rewards/margins": 5.559727668762207,
1749
+ "rewards/rejected": 11.615548133850098,
1750
+ "step": 990
1751
+ },
1752
+ {
1753
+ "epoch": 0.8888888888888888,
1754
+ "grad_norm": 30.22509002685547,
1755
+ "learning_rate": 1.8587130159608196e-07,
1756
+ "logits/chosen": 0.0050649940967559814,
1757
+ "logits/rejected": -0.0071399761363863945,
1758
+ "logps/chosen": -0.16131475567817688,
1759
+ "logps/rejected": -66.25190734863281,
1760
+ "loss": 0.0063,
1761
+ "rewards/accuracies": 1.0,
1762
+ "rewards/chosen": 17.313472747802734,
1763
+ "rewards/margins": 6.412895679473877,
1764
+ "rewards/rejected": 10.9005765914917,
1765
+ "step": 1000
1766
+ },
1767
+ {
1768
+ "epoch": 0.8977777777777778,
1769
+ "grad_norm": 53.02751922607422,
1770
+ "learning_rate": 1.5762568750059604e-07,
1771
+ "logits/chosen": 0.005312003195285797,
1772
+ "logits/rejected": -0.0038596936501562595,
1773
+ "logps/chosen": -4.00323486328125,
1774
+ "logps/rejected": -57.656890869140625,
1775
+ "loss": 0.2741,
1776
+ "rewards/accuracies": 0.925000011920929,
1777
+ "rewards/chosen": 17.167781829833984,
1778
+ "rewards/margins": 5.632095813751221,
1779
+ "rewards/rejected": 11.535685539245605,
1780
+ "step": 1010
1781
+ },
1782
+ {
1783
+ "epoch": 0.9066666666666666,
1784
+ "grad_norm": 119.56330108642578,
1785
+ "learning_rate": 1.316372219412454e-07,
1786
+ "logits/chosen": 0.004333779215812683,
1787
+ "logits/rejected": -0.007937717251479626,
1788
+ "logps/chosen": -0.10221245884895325,
1789
+ "logps/rejected": -67.09977722167969,
1790
+ "loss": 0.0513,
1791
+ "rewards/accuracies": 0.9750000238418579,
1792
+ "rewards/chosen": 17.151248931884766,
1793
+ "rewards/margins": 6.162137985229492,
1794
+ "rewards/rejected": 10.98911190032959,
1795
+ "step": 1020
1796
+ },
1797
+ {
1798
+ "epoch": 0.9066666666666666,
1799
+ "eval_logits/chosen": 0.004528110846877098,
1800
+ "eval_logits/rejected": -0.006501312367618084,
1801
+ "eval_logps/chosen": -1.817779302597046,
1802
+ "eval_logps/rejected": -60.34454345703125,
1803
+ "eval_loss": 0.14118175208568573,
1804
+ "eval_rewards/accuracies": 0.9610000848770142,
1805
+ "eval_rewards/chosen": 17.321117401123047,
1806
+ "eval_rewards/margins": 5.994848251342773,
1807
+ "eval_rewards/rejected": 11.32626724243164,
1808
+ "eval_runtime": 361.545,
1809
+ "eval_samples_per_second": 2.766,
1810
+ "eval_steps_per_second": 0.346,
1811
+ "step": 1020
1812
+ },
1813
+ {
1814
+ "epoch": 0.9155555555555556,
1815
+ "grad_norm": 0.9030271768569946,
1816
+ "learning_rate": 1.0793094781005792e-07,
1817
+ "logits/chosen": 0.0036234352737665176,
1818
+ "logits/rejected": -0.008390933275222778,
1819
+ "logps/chosen": -0.09899584949016571,
1820
+ "logps/rejected": -60.80555725097656,
1821
+ "loss": 0.0349,
1822
+ "rewards/accuracies": 0.987500011920929,
1823
+ "rewards/chosen": 17.619266510009766,
1824
+ "rewards/margins": 6.4814043045043945,
1825
+ "rewards/rejected": 11.137863159179688,
1826
+ "step": 1030
1827
+ },
1828
+ {
1829
+ "epoch": 0.9244444444444444,
1830
+ "grad_norm": 157.070068359375,
1831
+ "learning_rate": 8.652970884369255e-08,
1832
+ "logits/chosen": 0.005321115255355835,
1833
+ "logits/rejected": -0.004976513795554638,
1834
+ "logps/chosen": -1.3535833358764648,
1835
+ "logps/rejected": -61.2861328125,
1836
+ "loss": 0.1927,
1837
+ "rewards/accuracies": 0.9500000476837158,
1838
+ "rewards/chosen": 17.209800720214844,
1839
+ "rewards/margins": 5.795952796936035,
1840
+ "rewards/rejected": 11.413846015930176,
1841
+ "step": 1040
1842
+ },
1843
+ {
1844
+ "epoch": 0.9333333333333333,
1845
+ "grad_norm": 24.539953231811523,
1846
+ "learning_rate": 6.745412761086007e-08,
1847
+ "logits/chosen": 0.005845514126121998,
1848
+ "logits/rejected": -0.003921338357031345,
1849
+ "logps/chosen": -1.7223193645477295,
1850
+ "logps/rejected": -58.172142028808594,
1851
+ "loss": 0.1317,
1852
+ "rewards/accuracies": 0.9750000238418579,
1853
+ "rewards/chosen": 17.360435485839844,
1854
+ "rewards/margins": 5.838218688964844,
1855
+ "rewards/rejected": 11.522216796875,
1856
+ "step": 1050
1857
+ },
1858
+ {
1859
+ "epoch": 0.9422222222222222,
1860
+ "grad_norm": 0.061458222568035126,
1861
+ "learning_rate": 5.0722585640090305e-08,
1862
+ "logits/chosen": 0.003759522922337055,
1863
+ "logits/rejected": -0.0068417866714298725,
1864
+ "logps/chosen": -1.351855754852295,
1865
+ "logps/rejected": -59.56566619873047,
1866
+ "loss": 0.2279,
1867
+ "rewards/accuracies": 0.9375,
1868
+ "rewards/chosen": 17.245838165283203,
1869
+ "rewards/margins": 5.7038679122924805,
1870
+ "rewards/rejected": 11.541970252990723,
1871
+ "step": 1060
1872
+ },
1873
+ {
1874
+ "epoch": 0.9511111111111111,
1875
+ "grad_norm": 101.87332916259766,
1876
+ "learning_rate": 3.635120570700784e-08,
1877
+ "logits/chosen": 0.007607857696712017,
1878
+ "logits/rejected": -0.0035320711322128773,
1879
+ "logps/chosen": -1.4004669189453125,
1880
+ "logps/rejected": -63.46752166748047,
1881
+ "loss": 0.1021,
1882
+ "rewards/accuracies": 0.9624999761581421,
1883
+ "rewards/chosen": 17.240036010742188,
1884
+ "rewards/margins": 6.078882694244385,
1885
+ "rewards/rejected": 11.161155700683594,
1886
+ "step": 1070
1887
+ },
1888
+ {
1889
+ "epoch": 0.96,
1890
+ "grad_norm": 1.9229660034179688,
1891
+ "learning_rate": 2.4353836298169343e-08,
1892
+ "logits/chosen": 0.006209026090800762,
1893
+ "logits/rejected": -0.0033816141076385975,
1894
+ "logps/chosen": -2.0281822681427,
1895
+ "logps/rejected": -61.460670471191406,
1896
+ "loss": 0.1189,
1897
+ "rewards/accuracies": 0.9750000238418579,
1898
+ "rewards/chosen": 17.19972801208496,
1899
+ "rewards/margins": 5.881450653076172,
1900
+ "rewards/rejected": 11.318277359008789,
1901
+ "step": 1080
1902
+ },
1903
+ {
1904
+ "epoch": 0.96,
1905
+ "eval_logits/chosen": 0.00737445754930377,
1906
+ "eval_logits/rejected": -0.0035832570865750313,
1907
+ "eval_logps/chosen": -2.1419789791107178,
1908
+ "eval_logps/rejected": -60.60612106323242,
1909
+ "eval_loss": 0.15081512928009033,
1910
+ "eval_rewards/accuracies": 0.9610000848770142,
1911
+ "eval_rewards/chosen": 17.288694381713867,
1912
+ "eval_rewards/margins": 5.98858642578125,
1913
+ "eval_rewards/rejected": 11.300108909606934,
1914
+ "eval_runtime": 361.4165,
1915
+ "eval_samples_per_second": 2.767,
1916
+ "eval_steps_per_second": 0.346,
1917
+ "step": 1080
1918
+ },
1919
+ {
1920
+ "epoch": 0.9688888888888889,
1921
+ "grad_norm": 0.11888863146305084,
1922
+ "learning_rate": 1.4742038266447046e-08,
1923
+ "logits/chosen": 0.006043245084583759,
1924
+ "logits/rejected": -0.004712546244263649,
1925
+ "logps/chosen": -1.4127472639083862,
1926
+ "logps/rejected": -64.81529998779297,
1927
+ "loss": 0.0728,
1928
+ "rewards/accuracies": 0.9750000238418579,
1929
+ "rewards/chosen": 17.181049346923828,
1930
+ "rewards/margins": 6.14100456237793,
1931
+ "rewards/rejected": 11.040044784545898,
1932
+ "step": 1090
1933
+ },
1934
+ {
1935
+ "epoch": 0.9777777777777777,
1936
+ "grad_norm": 2.001819372177124,
1937
+ "learning_rate": 7.525073690809737e-09,
1938
+ "logits/chosen": 0.0055408780463039875,
1939
+ "logits/rejected": -0.005598037503659725,
1940
+ "logps/chosen": -1.017110824584961,
1941
+ "logps/rejected": -60.28044891357422,
1942
+ "loss": 0.0357,
1943
+ "rewards/accuracies": 0.9750000238418579,
1944
+ "rewards/chosen": 17.4679012298584,
1945
+ "rewards/margins": 6.193048477172852,
1946
+ "rewards/rejected": 11.274852752685547,
1947
+ "step": 1100
1948
+ },
1949
+ {
1950
+ "epoch": 0.9866666666666667,
1951
+ "grad_norm": 0.027056939899921417,
1952
+ "learning_rate": 2.709896951238744e-09,
1953
+ "logits/chosen": 0.00636716466397047,
1954
+ "logits/rejected": -0.004360577557235956,
1955
+ "logps/chosen": -2.8028905391693115,
1956
+ "logps/rejected": -60.86518859863281,
1957
+ "loss": 0.1973,
1958
+ "rewards/accuracies": 0.949999988079071,
1959
+ "rewards/chosen": 17.210033416748047,
1960
+ "rewards/margins": 5.936794281005859,
1961
+ "rewards/rejected": 11.273238182067871,
1962
+ "step": 1110
1963
+ },
1964
+ {
1965
+ "epoch": 0.9955555555555555,
1966
+ "grad_norm": 0.15637506544589996,
1967
+ "learning_rate": 3.0114802737818415e-10,
1968
+ "logits/chosen": 0.0038183885626494884,
1969
+ "logits/rejected": -0.007296917960047722,
1970
+ "logps/chosen": -1.1477452516555786,
1971
+ "logps/rejected": -59.37739181518555,
1972
+ "loss": 0.0538,
1973
+ "rewards/accuracies": 0.987500011920929,
1974
+ "rewards/chosen": 17.553251266479492,
1975
+ "rewards/margins": 6.2790141105651855,
1976
+ "rewards/rejected": 11.274236679077148,
1977
+ "step": 1120
1978
+ }
1979
+ ],
1980
+ "logging_steps": 10,
1981
+ "max_steps": 1125,
1982
+ "num_input_tokens_seen": 0,
1983
+ "num_train_epochs": 1,
1984
+ "save_steps": 500,
1985
+ "stateful_callbacks": {
1986
+ "TrainerControl": {
1987
+ "args": {
1988
+ "should_epoch_stop": false,
1989
+ "should_evaluate": false,
1990
+ "should_log": false,
1991
+ "should_save": true,
1992
+ "should_training_stop": true
1993
+ },
1994
+ "attributes": {}
1995
+ }
1996
+ },
1997
+ "total_flos": 1.4338459346927616e+18,
1998
+ "train_batch_size": 4,
1999
+ "trial_name": null,
2000
+ "trial_params": null
2001
+ }
checkpoint-1125/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5286bf380083ef64133f25e161ddfb8549ff44790e432f478753f221ef89e695
3
+ size 5368
checkpoint-500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-500/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "gate_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "down_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2405da5efa3fa58305eb789eb90dbe77348a677c94c781b71f5a7828902865ef
3
+ size 114106856
checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c427609b64317bf25a71f520f1925991aac70912f290b39b6953fa9c67daa4
3
+ size 228536930
checkpoint-500/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91bd7f619e4cd37883f469c08e90105c4d218fd82ffc43ae58fa9fdbcc37fce5
3
+ size 14512
checkpoint-500/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0a7593f9ab52bf47328c6d50954dce1fcd69866aa6f5f35851aef7f7af3899
3
+ size 14512
checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fd9d73796ba0c0da5d08a8e5aa955713fdd4d633eb58f1fea83f6389e8837c8
3
+ size 1064
checkpoint-500/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
checkpoint-500/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,911 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.4444444444444444,
5
+ "eval_steps": 60,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.008888888888888889,
13
+ "grad_norm": 8.760091781616211,
14
+ "learning_rate": 4.4247787610619474e-07,
15
+ "logits/chosen": -0.8248252868652344,
16
+ "logits/rejected": -0.8263720273971558,
17
+ "logps/chosen": -0.36086463928222656,
18
+ "logps/rejected": -5.696224689483643,
19
+ "loss": 1.1038,
20
+ "rewards/accuracies": 0.5125000476837158,
21
+ "rewards/chosen": 17.43745994567871,
22
+ "rewards/margins": 0.5984855890274048,
23
+ "rewards/rejected": 16.838973999023438,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.017777777777777778,
28
+ "grad_norm": 8.855981826782227,
29
+ "learning_rate": 8.849557522123895e-07,
30
+ "logits/chosen": -0.8169006109237671,
31
+ "logits/rejected": -0.819770872592926,
32
+ "logps/chosen": -0.12464660406112671,
33
+ "logps/rejected": -7.139842987060547,
34
+ "loss": 1.1887,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 17.17649269104004,
37
+ "rewards/margins": 0.19107049703598022,
38
+ "rewards/rejected": 16.98542022705078,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.02666666666666667,
43
+ "grad_norm": 16.764184951782227,
44
+ "learning_rate": 1.3274336283185843e-06,
45
+ "logits/chosen": -0.8003113865852356,
46
+ "logits/rejected": -0.8030117750167847,
47
+ "logps/chosen": -0.34651467204093933,
48
+ "logps/rejected": -6.967917442321777,
49
+ "loss": 1.0563,
50
+ "rewards/accuracies": 0.44999998807907104,
51
+ "rewards/chosen": 17.280975341796875,
52
+ "rewards/margins": 0.40005987882614136,
53
+ "rewards/rejected": 16.88091468811035,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.035555555555555556,
58
+ "grad_norm": 8.33682918548584,
59
+ "learning_rate": 1.769911504424779e-06,
60
+ "logits/chosen": -0.7695047855377197,
61
+ "logits/rejected": -0.7739207148551941,
62
+ "logps/chosen": -1.5993006229400635,
63
+ "logps/rejected": -8.504932403564453,
64
+ "loss": 0.7596,
65
+ "rewards/accuracies": 0.5,
66
+ "rewards/chosen": 17.283912658691406,
67
+ "rewards/margins": 0.6976072192192078,
68
+ "rewards/rejected": 16.5863037109375,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.044444444444444446,
73
+ "grad_norm": 4.494723320007324,
74
+ "learning_rate": 2.212389380530974e-06,
75
+ "logits/chosen": -0.7154140472412109,
76
+ "logits/rejected": -0.7225576043128967,
77
+ "logps/chosen": -3.112199068069458,
78
+ "logps/rejected": -12.212080001831055,
79
+ "loss": 0.6083,
80
+ "rewards/accuracies": 0.4625000059604645,
81
+ "rewards/chosen": 17.03064727783203,
82
+ "rewards/margins": 0.7148451805114746,
83
+ "rewards/rejected": 16.3158016204834,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.05333333333333334,
88
+ "grad_norm": 5.110287666320801,
89
+ "learning_rate": 2.6548672566371687e-06,
90
+ "logits/chosen": -0.6322453022003174,
91
+ "logits/rejected": -0.6387485265731812,
92
+ "logps/chosen": -5.650620460510254,
93
+ "logps/rejected": -12.759811401367188,
94
+ "loss": 0.3835,
95
+ "rewards/accuracies": 0.9125000238418579,
96
+ "rewards/chosen": 17.101289749145508,
97
+ "rewards/margins": 1.1824612617492676,
98
+ "rewards/rejected": 15.918828964233398,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.05333333333333334,
103
+ "eval_logits/chosen": -0.5826543569564819,
104
+ "eval_logits/rejected": -0.5914276838302612,
105
+ "eval_logps/chosen": -3.5471787452697754,
106
+ "eval_logps/rejected": -16.51181983947754,
107
+ "eval_loss": 0.3286525011062622,
108
+ "eval_rewards/accuracies": 0.9280000925064087,
109
+ "eval_rewards/chosen": 17.148174285888672,
110
+ "eval_rewards/margins": 1.4386365413665771,
111
+ "eval_rewards/rejected": 15.709539413452148,
112
+ "eval_runtime": 372.0227,
113
+ "eval_samples_per_second": 2.688,
114
+ "eval_steps_per_second": 0.336,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.06222222222222222,
119
+ "grad_norm": 5.098133563995361,
120
+ "learning_rate": 3.097345132743363e-06,
121
+ "logits/chosen": -0.5378152132034302,
122
+ "logits/rejected": -0.5494933724403381,
123
+ "logps/chosen": -1.5099802017211914,
124
+ "logps/rejected": -21.206321716308594,
125
+ "loss": 0.2931,
126
+ "rewards/accuracies": 0.9375,
127
+ "rewards/chosen": 17.083791732788086,
128
+ "rewards/margins": 1.5844331979751587,
129
+ "rewards/rejected": 15.499359130859375,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.07111111111111111,
134
+ "grad_norm": 29.787437438964844,
135
+ "learning_rate": 3.539823008849558e-06,
136
+ "logits/chosen": -0.443774938583374,
137
+ "logits/rejected": -0.45571577548980713,
138
+ "logps/chosen": -1.5804342031478882,
139
+ "logps/rejected": -22.606929779052734,
140
+ "loss": 0.202,
141
+ "rewards/accuracies": 0.9750000238418579,
142
+ "rewards/chosen": 17.302125930786133,
143
+ "rewards/margins": 2.174014091491699,
144
+ "rewards/rejected": 15.128110885620117,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.08,
149
+ "grad_norm": 23.14398193359375,
150
+ "learning_rate": 3.982300884955752e-06,
151
+ "logits/chosen": -0.3626072406768799,
152
+ "logits/rejected": -0.3787815570831299,
153
+ "logps/chosen": -2.203828811645508,
154
+ "logps/rejected": -29.433551788330078,
155
+ "loss": 0.2123,
156
+ "rewards/accuracies": 0.925000011920929,
157
+ "rewards/chosen": 17.00284194946289,
158
+ "rewards/margins": 2.320391893386841,
159
+ "rewards/rejected": 14.682450294494629,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.08888888888888889,
164
+ "grad_norm": 29.672739028930664,
165
+ "learning_rate": 4.424778761061948e-06,
166
+ "logits/chosen": -0.3035663962364197,
167
+ "logits/rejected": -0.31762221455574036,
168
+ "logps/chosen": -3.433589458465576,
169
+ "logps/rejected": -29.9322509765625,
170
+ "loss": 0.2592,
171
+ "rewards/accuracies": 0.9375,
172
+ "rewards/chosen": 16.929956436157227,
173
+ "rewards/margins": 2.31272029876709,
174
+ "rewards/rejected": 14.617237091064453,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.09777777777777778,
179
+ "grad_norm": 1.873722791671753,
180
+ "learning_rate": 4.867256637168142e-06,
181
+ "logits/chosen": -0.2679600715637207,
182
+ "logits/rejected": -0.2826440930366516,
183
+ "logps/chosen": -0.9653514623641968,
184
+ "logps/rejected": -30.235322952270508,
185
+ "loss": 0.1336,
186
+ "rewards/accuracies": 0.949999988079071,
187
+ "rewards/chosen": 17.462385177612305,
188
+ "rewards/margins": 3.1994175910949707,
189
+ "rewards/rejected": 14.26296615600586,
190
+ "step": 110
191
+ },
192
+ {
193
+ "epoch": 0.10666666666666667,
194
+ "grad_norm": 1.6913721561431885,
195
+ "learning_rate": 4.999409761242696e-06,
196
+ "logits/chosen": -0.22222033143043518,
197
+ "logits/rejected": -0.23720571398735046,
198
+ "logps/chosen": -4.4953508377075195,
199
+ "logps/rejected": -34.074745178222656,
200
+ "loss": 0.2552,
201
+ "rewards/accuracies": 0.8999999761581421,
202
+ "rewards/chosen": 17.04866600036621,
203
+ "rewards/margins": 3.1014418601989746,
204
+ "rewards/rejected": 13.947224617004395,
205
+ "step": 120
206
+ },
207
+ {
208
+ "epoch": 0.10666666666666667,
209
+ "eval_logits/chosen": -0.206527978181839,
210
+ "eval_logits/rejected": -0.22178640961647034,
211
+ "eval_logps/chosen": -3.69442081451416,
212
+ "eval_logps/rejected": -36.072166442871094,
213
+ "eval_loss": 0.18996010720729828,
214
+ "eval_rewards/accuracies": 0.9320000410079956,
215
+ "eval_rewards/chosen": 17.133451461791992,
216
+ "eval_rewards/margins": 3.379946708679199,
217
+ "eval_rewards/rejected": 13.753504753112793,
218
+ "eval_runtime": 361.5279,
219
+ "eval_samples_per_second": 2.766,
220
+ "eval_steps_per_second": 0.346,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.11555555555555555,
225
+ "grad_norm": 61.80262756347656,
226
+ "learning_rate": 4.996519466816778e-06,
227
+ "logits/chosen": -0.18473535776138306,
228
+ "logits/rejected": -0.1988501250743866,
229
+ "logps/chosen": -3.7009687423706055,
230
+ "logps/rejected": -39.289939880371094,
231
+ "loss": 0.1394,
232
+ "rewards/accuracies": 0.9624999761581421,
233
+ "rewards/chosen": 17.106964111328125,
234
+ "rewards/margins": 3.633338212966919,
235
+ "rewards/rejected": 13.473625183105469,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.12444444444444444,
240
+ "grad_norm": 1.6732702255249023,
241
+ "learning_rate": 4.9912234871722805e-06,
242
+ "logits/chosen": -0.16134041547775269,
243
+ "logits/rejected": -0.17547868192195892,
244
+ "logps/chosen": -3.0637736320495605,
245
+ "logps/rejected": -40.07548522949219,
246
+ "loss": 0.1408,
247
+ "rewards/accuracies": 0.9750000238418579,
248
+ "rewards/chosen": 17.392223358154297,
249
+ "rewards/margins": 4.242353439331055,
250
+ "rewards/rejected": 13.149867057800293,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.13333333333333333,
255
+ "grad_norm": 0.346453994512558,
256
+ "learning_rate": 4.98352692559805e-06,
257
+ "logits/chosen": -0.13797929883003235,
258
+ "logits/rejected": -0.15283086895942688,
259
+ "logps/chosen": -5.14492130279541,
260
+ "logps/rejected": -47.97212219238281,
261
+ "loss": 0.2153,
262
+ "rewards/accuracies": 0.9375,
263
+ "rewards/chosen": 16.896778106689453,
264
+ "rewards/margins": 4.227695465087891,
265
+ "rewards/rejected": 12.669081687927246,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.14222222222222222,
270
+ "grad_norm": 0.21871662139892578,
271
+ "learning_rate": 4.973437198621237e-06,
272
+ "logits/chosen": -0.12396670132875443,
273
+ "logits/rejected": -0.13780555129051208,
274
+ "logps/chosen": -6.108860015869141,
275
+ "logps/rejected": -54.90739440917969,
276
+ "loss": 0.0388,
277
+ "rewards/accuracies": 0.9750000238418579,
278
+ "rewards/chosen": 16.75935935974121,
279
+ "rewards/margins": 4.755282878875732,
280
+ "rewards/rejected": 12.004077911376953,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.1511111111111111,
285
+ "grad_norm": 235.12429809570312,
286
+ "learning_rate": 4.960964028860621e-06,
287
+ "logits/chosen": -0.1140839159488678,
288
+ "logits/rejected": -0.1263057291507721,
289
+ "logps/chosen": -12.605452537536621,
290
+ "logps/rejected": -53.81230926513672,
291
+ "loss": 0.4651,
292
+ "rewards/accuracies": 0.875,
293
+ "rewards/chosen": 16.101238250732422,
294
+ "rewards/margins": 3.9864249229431152,
295
+ "rewards/rejected": 12.114812850952148,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.16,
300
+ "grad_norm": 190.97048950195312,
301
+ "learning_rate": 4.946119435657738e-06,
302
+ "logits/chosen": -0.10746976733207703,
303
+ "logits/rejected": -0.11878640949726105,
304
+ "logps/chosen": -8.5105562210083,
305
+ "logps/rejected": -51.314781188964844,
306
+ "loss": 0.2362,
307
+ "rewards/accuracies": 0.925000011920929,
308
+ "rewards/chosen": 16.719980239868164,
309
+ "rewards/margins": 4.549674034118652,
310
+ "rewards/rejected": 12.170306205749512,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.16,
315
+ "eval_logits/chosen": -0.10870806127786636,
316
+ "eval_logits/rejected": -0.12223993986845016,
317
+ "eval_logps/chosen": -4.414996147155762,
318
+ "eval_logps/rejected": -53.885032653808594,
319
+ "eval_loss": 0.20236633718013763,
320
+ "eval_rewards/accuracies": 0.9510000944137573,
321
+ "eval_rewards/chosen": 17.06139373779297,
322
+ "eval_rewards/margins": 5.089176177978516,
323
+ "eval_rewards/rejected": 11.97221851348877,
324
+ "eval_runtime": 361.4355,
325
+ "eval_samples_per_second": 2.767,
326
+ "eval_steps_per_second": 0.346,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 0.1688888888888889,
331
+ "grad_norm": 56.81266784667969,
332
+ "learning_rate": 4.928917723494854e-06,
333
+ "logits/chosen": -0.10682469606399536,
334
+ "logits/rejected": -0.12124393880367279,
335
+ "logps/chosen": -3.058413028717041,
336
+ "logps/rejected": -55.052528381347656,
337
+ "loss": 0.2442,
338
+ "rewards/accuracies": 0.9500000476837158,
339
+ "rewards/chosen": 17.058589935302734,
340
+ "rewards/margins": 5.056097984313965,
341
+ "rewards/rejected": 12.002490043640137,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 0.17777777777777778,
346
+ "grad_norm": 175.06552124023438,
347
+ "learning_rate": 4.909375468210947e-06,
348
+ "logits/chosen": -0.10520349442958832,
349
+ "logits/rejected": -0.12018950283527374,
350
+ "logps/chosen": -4.114959716796875,
351
+ "logps/rejected": -55.9394645690918,
352
+ "loss": 0.1915,
353
+ "rewards/accuracies": 0.9500000476837158,
354
+ "rewards/chosen": 16.98603057861328,
355
+ "rewards/margins": 5.105838775634766,
356
+ "rewards/rejected": 11.880191802978516,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.18666666666666668,
361
+ "grad_norm": 78.06558990478516,
362
+ "learning_rate": 4.8875115010289655e-06,
363
+ "logits/chosen": -0.10475558042526245,
364
+ "logits/rejected": -0.11949175596237183,
365
+ "logps/chosen": -6.760301113128662,
366
+ "logps/rejected": -53.91607666015625,
367
+ "loss": 0.2843,
368
+ "rewards/accuracies": 0.9375,
369
+ "rewards/chosen": 16.857545852661133,
370
+ "rewards/margins": 4.917357921600342,
371
+ "rewards/rejected": 11.94018840789795,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.19555555555555557,
376
+ "grad_norm": 15.880486488342285,
377
+ "learning_rate": 4.863346890409768e-06,
378
+ "logits/chosen": -0.11213523149490356,
379
+ "logits/rejected": -0.12581588327884674,
380
+ "logps/chosen": -6.759585380554199,
381
+ "logps/rejected": -51.10936737060547,
382
+ "loss": 0.5104,
383
+ "rewards/accuracies": 0.875,
384
+ "rewards/chosen": 16.859071731567383,
385
+ "rewards/margins": 4.638372898101807,
386
+ "rewards/rejected": 12.220698356628418,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 0.20444444444444446,
391
+ "grad_norm": 46.97845458984375,
392
+ "learning_rate": 4.836904921750224e-06,
393
+ "logits/chosen": -0.11947059631347656,
394
+ "logits/rejected": -0.1329912692308426,
395
+ "logps/chosen": -3.608184814453125,
396
+ "logps/rejected": -48.794761657714844,
397
+ "loss": 0.2134,
398
+ "rewards/accuracies": 0.925000011920929,
399
+ "rewards/chosen": 17.235904693603516,
400
+ "rewards/margins": 4.859888076782227,
401
+ "rewards/rejected": 12.376014709472656,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 0.21333333333333335,
406
+ "grad_norm": 24.032859802246094,
407
+ "learning_rate": 4.808211074945042e-06,
408
+ "logits/chosen": -0.1200513243675232,
409
+ "logits/rejected": -0.1333036869764328,
410
+ "logps/chosen": -3.7552154064178467,
411
+ "logps/rejected": -49.87453079223633,
412
+ "loss": 0.1781,
413
+ "rewards/accuracies": 0.9500000476837158,
414
+ "rewards/chosen": 17.094650268554688,
415
+ "rewards/margins": 4.68077278137207,
416
+ "rewards/rejected": 12.41387939453125,
417
+ "step": 240
418
+ },
419
+ {
420
+ "epoch": 0.21333333333333335,
421
+ "eval_logits/chosen": -0.12433278560638428,
422
+ "eval_logits/rejected": -0.13808581233024597,
423
+ "eval_logps/chosen": -4.408891201019287,
424
+ "eval_logps/rejected": -50.744781494140625,
425
+ "eval_loss": 0.1546352356672287,
426
+ "eval_rewards/accuracies": 0.9500000476837158,
427
+ "eval_rewards/chosen": 17.06200408935547,
428
+ "eval_rewards/margins": 4.775761604309082,
429
+ "eval_rewards/rejected": 12.286243438720703,
430
+ "eval_runtime": 361.4974,
431
+ "eval_samples_per_second": 2.766,
432
+ "eval_steps_per_second": 0.346,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 0.2222222222222222,
437
+ "grad_norm": 0.25737640261650085,
438
+ "learning_rate": 4.7772929998339485e-06,
439
+ "logits/chosen": -0.12348780035972595,
440
+ "logits/rejected": -0.13704943656921387,
441
+ "logps/chosen": -4.4299187660217285,
442
+ "logps/rejected": -53.074607849121094,
443
+ "loss": 0.1373,
444
+ "rewards/accuracies": 0.9375,
445
+ "rewards/chosen": 17.087068557739258,
446
+ "rewards/margins": 5.06691837310791,
447
+ "rewards/rejected": 12.020149230957031,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 0.2311111111111111,
452
+ "grad_norm": 0.1839389204978943,
453
+ "learning_rate": 4.744180489557859e-06,
454
+ "logits/chosen": -0.12177034467458725,
455
+ "logits/rejected": -0.1342695653438568,
456
+ "logps/chosen": -3.775188446044922,
457
+ "logps/rejected": -53.98720932006836,
458
+ "loss": 0.1896,
459
+ "rewards/accuracies": 0.949999988079071,
460
+ "rewards/chosen": 17.12021255493164,
461
+ "rewards/margins": 5.148064613342285,
462
+ "rewards/rejected": 11.972146987915039,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 0.24,
467
+ "grad_norm": 12.258485794067383,
468
+ "learning_rate": 4.708905451849754e-06,
469
+ "logits/chosen": -0.11067859083414078,
470
+ "logits/rejected": -0.12377731502056122,
471
+ "logps/chosen": -6.418317794799805,
472
+ "logps/rejected": -56.57402801513672,
473
+ "loss": 0.2315,
474
+ "rewards/accuracies": 0.9375,
475
+ "rewards/chosen": 16.738832473754883,
476
+ "rewards/margins": 4.884931564331055,
477
+ "rewards/rejected": 11.853900909423828,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 0.24888888888888888,
482
+ "grad_norm": 77.56194305419922,
483
+ "learning_rate": 4.671501878287879e-06,
484
+ "logits/chosen": -0.1184445172548294,
485
+ "logits/rejected": -0.1339874565601349,
486
+ "logps/chosen": -10.12116527557373,
487
+ "logps/rejected": -53.403907775878906,
488
+ "loss": 0.5343,
489
+ "rewards/accuracies": 0.862500011920929,
490
+ "rewards/chosen": 16.458633422851562,
491
+ "rewards/margins": 4.402472496032715,
492
+ "rewards/rejected": 12.056160926818848,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.2577777777777778,
497
+ "grad_norm": 67.53883361816406,
498
+ "learning_rate": 4.6320058115409295e-06,
499
+ "logits/chosen": -0.1448262631893158,
500
+ "logits/rejected": -0.15793387591838837,
501
+ "logps/chosen": -3.4666190147399902,
502
+ "logps/rejected": -48.79213333129883,
503
+ "loss": 0.5017,
504
+ "rewards/accuracies": 0.887499988079071,
505
+ "rewards/chosen": 16.945899963378906,
506
+ "rewards/margins": 4.2686333656311035,
507
+ "rewards/rejected": 12.677268028259277,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.26666666666666666,
512
+ "grad_norm": 0.17521341145038605,
513
+ "learning_rate": 4.590455310636778e-06,
514
+ "logits/chosen": -0.16128253936767578,
515
+ "logits/rejected": -0.17375555634498596,
516
+ "logps/chosen": -2.9032950401306152,
517
+ "logps/rejected": -47.69734191894531,
518
+ "loss": 0.265,
519
+ "rewards/accuracies": 0.925000011920929,
520
+ "rewards/chosen": 17.18383026123047,
521
+ "rewards/margins": 4.541309356689453,
522
+ "rewards/rejected": 12.642518997192383,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.26666666666666666,
527
+ "eval_logits/chosen": -0.17444846034049988,
528
+ "eval_logits/rejected": -0.18559777736663818,
529
+ "eval_logps/chosen": -2.535512924194336,
530
+ "eval_logps/rejected": -47.16367721557617,
531
+ "eval_loss": 0.15360687673091888,
532
+ "eval_rewards/accuracies": 0.9440000653266907,
533
+ "eval_rewards/chosen": 17.249343872070312,
534
+ "eval_rewards/margins": 4.604989051818848,
535
+ "eval_rewards/rejected": 12.644353866577148,
536
+ "eval_runtime": 361.4575,
537
+ "eval_samples_per_second": 2.767,
538
+ "eval_steps_per_second": 0.346,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 0.27555555555555555,
543
+ "grad_norm": 0.5040452480316162,
544
+ "learning_rate": 4.54689041428819e-06,
545
+ "logits/chosen": -0.16974106431007385,
546
+ "logits/rejected": -0.1810058057308197,
547
+ "logps/chosen": -1.233938217163086,
548
+ "logps/rejected": -49.907745361328125,
549
+ "loss": 0.1132,
550
+ "rewards/accuracies": 0.9500000476837158,
551
+ "rewards/chosen": 17.34117889404297,
552
+ "rewards/margins": 4.934173583984375,
553
+ "rewards/rejected": 12.407005310058594,
554
+ "step": 310
555
+ },
556
+ {
557
+ "epoch": 0.28444444444444444,
558
+ "grad_norm": 100.02949523925781,
559
+ "learning_rate": 4.501353102310901e-06,
560
+ "logits/chosen": -0.15705889463424683,
561
+ "logits/rejected": -0.1695334017276764,
562
+ "logps/chosen": -1.0820492506027222,
563
+ "logps/rejected": -52.577110290527344,
564
+ "loss": 0.1194,
565
+ "rewards/accuracies": 0.9500000476837158,
566
+ "rewards/chosen": 17.33388900756836,
567
+ "rewards/margins": 5.154760837554932,
568
+ "rewards/rejected": 12.179126739501953,
569
+ "step": 320
570
+ },
571
+ {
572
+ "epoch": 0.29333333333333333,
573
+ "grad_norm": 0.2689219117164612,
574
+ "learning_rate": 4.453887255171206e-06,
575
+ "logits/chosen": -0.13849371671676636,
576
+ "logits/rejected": -0.14990833401679993,
577
+ "logps/chosen": -1.8435032367706299,
578
+ "logps/rejected": -54.79044723510742,
579
+ "loss": 0.0926,
580
+ "rewards/accuracies": 0.9500000476837158,
581
+ "rewards/chosen": 17.2423095703125,
582
+ "rewards/margins": 5.28987979888916,
583
+ "rewards/rejected": 11.952428817749023,
584
+ "step": 330
585
+ },
586
+ {
587
+ "epoch": 0.3022222222222222,
588
+ "grad_norm": 0.09305431693792343,
589
+ "learning_rate": 4.404538611702055e-06,
590
+ "logits/chosen": -0.12299702316522598,
591
+ "logits/rejected": -0.13453055918216705,
592
+ "logps/chosen": -2.9897143840789795,
593
+ "logps/rejected": -52.954498291015625,
594
+ "loss": 0.2873,
595
+ "rewards/accuracies": 0.925000011920929,
596
+ "rewards/chosen": 17.17474365234375,
597
+ "rewards/margins": 5.071004867553711,
598
+ "rewards/rejected": 12.103739738464355,
599
+ "step": 340
600
+ },
601
+ {
602
+ "epoch": 0.3111111111111111,
603
+ "grad_norm": 59.282073974609375,
604
+ "learning_rate": 4.3533547250284015e-06,
605
+ "logits/chosen": -0.11913029849529266,
606
+ "logits/rejected": -0.12785324454307556,
607
+ "logps/chosen": -3.9456872940063477,
608
+ "logps/rejected": -48.68487548828125,
609
+ "loss": 0.4332,
610
+ "rewards/accuracies": 0.875,
611
+ "rewards/chosen": 17.12805938720703,
612
+ "rewards/margins": 4.669450283050537,
613
+ "rewards/rejected": 12.458610534667969,
614
+ "step": 350
615
+ },
616
+ {
617
+ "epoch": 0.32,
618
+ "grad_norm": 0.31101909279823303,
619
+ "learning_rate": 4.300384916744261e-06,
620
+ "logits/chosen": -0.11280188709497452,
621
+ "logits/rejected": -0.12300585210323334,
622
+ "logps/chosen": -2.1714723110198975,
623
+ "logps/rejected": -54.74174118041992,
624
+ "loss": 0.1605,
625
+ "rewards/accuracies": 0.9500000476837158,
626
+ "rewards/chosen": 17.326162338256836,
627
+ "rewards/margins": 5.467062473297119,
628
+ "rewards/rejected": 11.859098434448242,
629
+ "step": 360
630
+ },
631
+ {
632
+ "epoch": 0.32,
633
+ "eval_logits/chosen": -0.10620756447315216,
634
+ "eval_logits/rejected": -0.11727114766836166,
635
+ "eval_logps/chosen": -1.4165427684783936,
636
+ "eval_logps/rejected": -50.9525146484375,
637
+ "eval_loss": 0.3194349706172943,
638
+ "eval_rewards/accuracies": 0.9210000038146973,
639
+ "eval_rewards/chosen": 17.36124038696289,
640
+ "eval_rewards/margins": 5.095769882202148,
641
+ "eval_rewards/rejected": 12.26546859741211,
642
+ "eval_runtime": 361.5072,
643
+ "eval_samples_per_second": 2.766,
644
+ "eval_steps_per_second": 0.346,
645
+ "step": 360
646
+ },
647
+ {
648
+ "epoch": 0.3288888888888889,
649
+ "grad_norm": 6.1126532554626465,
650
+ "learning_rate": 4.24568022938566e-06,
651
+ "logits/chosen": -0.10354311764240265,
652
+ "logits/rejected": -0.11526636779308319,
653
+ "logps/chosen": -1.2935255765914917,
654
+ "logps/rejected": -55.57566833496094,
655
+ "loss": 0.1711,
656
+ "rewards/accuracies": 0.9500000476837158,
657
+ "rewards/chosen": 17.439346313476562,
658
+ "rewards/margins": 5.700921058654785,
659
+ "rewards/rejected": 11.738424301147461,
660
+ "step": 370
661
+ },
662
+ {
663
+ "epoch": 0.3377777777777778,
664
+ "grad_norm": 34.15927505493164,
665
+ "learning_rate": 4.189293377245241e-06,
666
+ "logits/chosen": -0.1029932051897049,
667
+ "logits/rejected": -0.11382515728473663,
668
+ "logps/chosen": -2.5132687091827393,
669
+ "logps/rejected": -55.50346374511719,
670
+ "loss": 0.4359,
671
+ "rewards/accuracies": 0.8875000476837158,
672
+ "rewards/chosen": 16.731037139892578,
673
+ "rewards/margins": 4.368172645568848,
674
+ "rewards/rejected": 12.362865447998047,
675
+ "step": 380
676
+ },
677
+ {
678
+ "epoch": 0.3466666666666667,
679
+ "grad_norm": 2.8422904014587402,
680
+ "learning_rate": 4.131278695575952e-06,
681
+ "logits/chosen": -0.10793520510196686,
682
+ "logits/rejected": -0.12109285593032837,
683
+ "logps/chosen": -3.014652729034424,
684
+ "logps/rejected": -53.98411560058594,
685
+ "loss": 0.2161,
686
+ "rewards/accuracies": 0.949999988079071,
687
+ "rewards/chosen": 17.137393951416016,
688
+ "rewards/margins": 5.105995178222656,
689
+ "rewards/rejected": 12.03139877319336,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 0.35555555555555557,
694
+ "grad_norm": 54.0329475402832,
695
+ "learning_rate": 4.071692088232743e-06,
696
+ "logits/chosen": -0.10393750667572021,
697
+ "logits/rejected": -0.11834606528282166,
698
+ "logps/chosen": -2.1508543491363525,
699
+ "logps/rejected": -45.60733413696289,
700
+ "loss": 0.2077,
701
+ "rewards/accuracies": 0.925000011920929,
702
+ "rewards/chosen": 17.586124420166016,
703
+ "rewards/margins": 5.077212333679199,
704
+ "rewards/rejected": 12.5089111328125,
705
+ "step": 400
706
+ },
707
+ {
708
+ "epoch": 0.36444444444444446,
709
+ "grad_norm": 81.61144256591797,
710
+ "learning_rate": 4.010590973802737e-06,
711
+ "logits/chosen": -0.09564584493637085,
712
+ "logits/rejected": -0.10617707669734955,
713
+ "logps/chosen": -3.4572842121124268,
714
+ "logps/rejected": -50.92162322998047,
715
+ "loss": 0.2478,
716
+ "rewards/accuracies": 0.8875000476837158,
717
+ "rewards/chosen": 17.010910034179688,
718
+ "rewards/margins": 4.556198596954346,
719
+ "rewards/rejected": 12.454713821411133,
720
+ "step": 410
721
+ },
722
+ {
723
+ "epoch": 0.37333333333333335,
724
+ "grad_norm": 0.30974289774894714,
725
+ "learning_rate": 3.948034230275781e-06,
726
+ "logits/chosen": -0.09134417027235031,
727
+ "logits/rejected": -0.1020016297698021,
728
+ "logps/chosen": -5.046698570251465,
729
+ "logps/rejected": -48.908958435058594,
730
+ "loss": 0.2894,
731
+ "rewards/accuracies": 0.8999999761581421,
732
+ "rewards/chosen": 17.007888793945312,
733
+ "rewards/margins": 4.53641414642334,
734
+ "rewards/rejected": 12.471475601196289,
735
+ "step": 420
736
+ },
737
+ {
738
+ "epoch": 0.37333333333333335,
739
+ "eval_logits/chosen": -0.09054450690746307,
740
+ "eval_logits/rejected": -0.10264354199171066,
741
+ "eval_logps/chosen": -1.913105845451355,
742
+ "eval_logps/rejected": -51.11127471923828,
743
+ "eval_loss": 0.16789735853672028,
744
+ "eval_rewards/accuracies": 0.9450000524520874,
745
+ "eval_rewards/chosen": 17.311582565307617,
746
+ "eval_rewards/margins": 5.061989784240723,
747
+ "eval_rewards/rejected": 12.249593734741211,
748
+ "eval_runtime": 361.5337,
749
+ "eval_samples_per_second": 2.766,
750
+ "eval_steps_per_second": 0.346,
751
+ "step": 420
752
+ },
753
+ {
754
+ "epoch": 0.38222222222222224,
755
+ "grad_norm": 12.824393272399902,
756
+ "learning_rate": 3.884082138308699e-06,
757
+ "logits/chosen": -0.08666776865720749,
758
+ "logits/rejected": -0.0997733399271965,
759
+ "logps/chosen": -1.7306327819824219,
760
+ "logps/rejected": -54.273292541503906,
761
+ "loss": 0.2298,
762
+ "rewards/accuracies": 0.9500000476837158,
763
+ "rewards/chosen": 17.167621612548828,
764
+ "rewards/margins": 5.065673351287842,
765
+ "rewards/rejected": 12.101947784423828,
766
+ "step": 430
767
+ },
768
+ {
769
+ "epoch": 0.39111111111111113,
770
+ "grad_norm": 0.30713599920272827,
771
+ "learning_rate": 3.818796323137896e-06,
772
+ "logits/chosen": -0.09174907952547073,
773
+ "logits/rejected": -0.10376611351966858,
774
+ "logps/chosen": -1.489154577255249,
775
+ "logps/rejected": -54.580726623535156,
776
+ "loss": 0.2513,
777
+ "rewards/accuracies": 0.9375,
778
+ "rewards/chosen": 17.22280502319336,
779
+ "rewards/margins": 5.175349235534668,
780
+ "rewards/rejected": 12.047454833984375,
781
+ "step": 440
782
+ },
783
+ {
784
+ "epoch": 0.4,
785
+ "grad_norm": 87.4791488647461,
786
+ "learning_rate": 3.7522396951963303e-06,
787
+ "logits/chosen": -0.09688778221607208,
788
+ "logits/rejected": -0.10897806286811829,
789
+ "logps/chosen": -3.157695770263672,
790
+ "logps/rejected": -50.96417236328125,
791
+ "loss": 0.1758,
792
+ "rewards/accuracies": 0.9500000476837158,
793
+ "rewards/chosen": 17.345651626586914,
794
+ "rewards/margins": 5.245656967163086,
795
+ "rewards/rejected": 12.099993705749512,
796
+ "step": 450
797
+ },
798
+ {
799
+ "epoch": 0.4088888888888889,
800
+ "grad_norm": 146.2008056640625,
801
+ "learning_rate": 3.684476389492026e-06,
802
+ "logits/chosen": -0.09378582239151001,
803
+ "logits/rejected": -0.10475654900074005,
804
+ "logps/chosen": -0.5611928701400757,
805
+ "logps/rejected": -56.518890380859375,
806
+ "loss": 0.1981,
807
+ "rewards/accuracies": 0.9500000476837158,
808
+ "rewards/chosen": 17.113712310791016,
809
+ "rewards/margins": 5.068872928619385,
810
+ "rewards/rejected": 12.044839859008789,
811
+ "step": 460
812
+ },
813
+ {
814
+ "epoch": 0.4177777777777778,
815
+ "grad_norm": 1.9137721061706543,
816
+ "learning_rate": 3.6155717038065783e-06,
817
+ "logits/chosen": -0.08695463836193085,
818
+ "logits/rejected": -0.09596743434667587,
819
+ "logps/chosen": -1.5298550128936768,
820
+ "logps/rejected": -50.27445983886719,
821
+ "loss": 0.2066,
822
+ "rewards/accuracies": 0.9375,
823
+ "rewards/chosen": 17.35186004638672,
824
+ "rewards/margins": 5.014693260192871,
825
+ "rewards/rejected": 12.337167739868164,
826
+ "step": 470
827
+ },
828
+ {
829
+ "epoch": 0.4266666666666667,
830
+ "grad_norm": 84.80391693115234,
831
+ "learning_rate": 3.545592035773192e-06,
832
+ "logits/chosen": -0.0746893435716629,
833
+ "logits/rejected": -0.08653923869132996,
834
+ "logps/chosen": -2.0052125453948975,
835
+ "logps/rejected": -57.502811431884766,
836
+ "loss": 0.1149,
837
+ "rewards/accuracies": 0.9500000476837158,
838
+ "rewards/chosen": 17.14373016357422,
839
+ "rewards/margins": 5.360415935516357,
840
+ "rewards/rejected": 11.783313751220703,
841
+ "step": 480
842
+ },
843
+ {
844
+ "epoch": 0.4266666666666667,
845
+ "eval_logits/chosen": -0.07700399309396744,
846
+ "eval_logits/rejected": -0.08828537166118622,
847
+ "eval_logps/chosen": -4.48896598815918,
848
+ "eval_logps/rejected": -53.76282501220703,
849
+ "eval_loss": 0.29511645436286926,
850
+ "eval_rewards/accuracies": 0.9230000376701355,
851
+ "eval_rewards/chosen": 17.053997039794922,
852
+ "eval_rewards/margins": 5.069558143615723,
853
+ "eval_rewards/rejected": 11.984437942504883,
854
+ "eval_runtime": 361.5035,
855
+ "eval_samples_per_second": 2.766,
856
+ "eval_steps_per_second": 0.346,
857
+ "step": 480
858
+ },
859
+ {
860
+ "epoch": 0.43555555555555553,
861
+ "grad_norm": 82.9616470336914,
862
+ "learning_rate": 3.4746048188948806e-06,
863
+ "logits/chosen": -0.06675051152706146,
864
+ "logits/rejected": -0.07860895991325378,
865
+ "logps/chosen": -4.162237167358398,
866
+ "logps/rejected": -54.77789306640625,
867
+ "loss": 0.2979,
868
+ "rewards/accuracies": 0.9125000238418579,
869
+ "rewards/chosen": 17.047603607177734,
870
+ "rewards/margins": 5.138361930847168,
871
+ "rewards/rejected": 11.909242630004883,
872
+ "step": 490
873
+ },
874
+ {
875
+ "epoch": 0.4444444444444444,
876
+ "grad_norm": 0.04293210059404373,
877
+ "learning_rate": 3.4026784575644887e-06,
878
+ "logits/chosen": -0.06424491107463837,
879
+ "logits/rejected": -0.07567107677459717,
880
+ "logps/chosen": -2.05729603767395,
881
+ "logps/rejected": -56.646087646484375,
882
+ "loss": 0.4378,
883
+ "rewards/accuracies": 0.8875000476837158,
884
+ "rewards/chosen": 16.947803497314453,
885
+ "rewards/margins": 4.919981956481934,
886
+ "rewards/rejected": 12.02782154083252,
887
+ "step": 500
888
+ }
889
+ ],
890
+ "logging_steps": 10,
891
+ "max_steps": 1125,
892
+ "num_input_tokens_seen": 0,
893
+ "num_train_epochs": 1,
894
+ "save_steps": 500,
895
+ "stateful_callbacks": {
896
+ "TrainerControl": {
897
+ "args": {
898
+ "should_epoch_stop": false,
899
+ "should_evaluate": false,
900
+ "should_log": false,
901
+ "should_save": true,
902
+ "should_training_stop": false
903
+ },
904
+ "attributes": {}
905
+ }
906
+ },
907
+ "total_flos": 6.372648598634496e+17,
908
+ "train_batch_size": 4,
909
+ "trial_name": null,
910
+ "trial_params": null
911
+ }
checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5286bf380083ef64133f25e161ddfb8549ff44790e432f478753f221ef89e695
3
+ size 5368
eval_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": 0.0034948738757520914,
4
+ "eval_logits/rejected": -0.007565807551145554,
5
+ "eval_logps/chosen": -2.20627498626709,
6
+ "eval_logps/rejected": -60.60332489013672,
7
+ "eval_loss": 0.15350937843322754,
8
+ "eval_rewards/accuracies": 0.9610000848770142,
9
+ "eval_rewards/chosen": 17.28226661682129,
10
+ "eval_rewards/margins": 5.981877326965332,
11
+ "eval_rewards/rejected": 11.30038833618164,
12
+ "eval_runtime": 361.5652,
13
+ "eval_samples_per_second": 2.766,
14
+ "eval_steps_per_second": 0.346
15
+ }
runs/Dec18_22-22-59_yadi/events.out.tfevents.1734560709.yadi.436386.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:697250b13dc063f122f3cccdacb9ae4ca8a13cbea14f3d9f6c0083f67e62277a
3
+ size 95792
runs/Dec18_22-22-59_yadi/events.out.tfevents.1734574921.yadi.436386.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f0ff4dbe534e3fef2d0e8a554d703d5b9903029ef4e11f5a8c6830e79a5bfe
3
+ size 828
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 1.4338459346927616e+18,
4
+ "train_loss": 0.23597783709896936,
5
+ "train_runtime": 13850.7044,
6
+ "train_samples_per_second": 0.65,
7
+ "train_steps_per_second": 0.081
8
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 1125, "loss": 1.1038, "accuracy": 0.5125000476837158, "lr": 4.4247787610619474e-07, "epoch": 0.008888888888888889, "percentage": 0.89, "elapsed_time": "0:01:11", "remaining_time": "2:12:28"}
2
+ {"current_steps": 20, "total_steps": 1125, "loss": 1.1887, "accuracy": 0.4000000059604645, "lr": 8.849557522123895e-07, "epoch": 0.017777777777777778, "percentage": 1.78, "elapsed_time": "0:02:19", "remaining_time": "2:08:48"}
3
+ {"current_steps": 30, "total_steps": 1125, "loss": 1.0563, "accuracy": 0.44999998807907104, "lr": 1.3274336283185843e-06, "epoch": 0.02666666666666667, "percentage": 2.67, "elapsed_time": "0:03:27", "remaining_time": "2:06:15"}
4
+ {"current_steps": 40, "total_steps": 1125, "loss": 0.7596, "accuracy": 0.5, "lr": 1.769911504424779e-06, "epoch": 0.035555555555555556, "percentage": 3.56, "elapsed_time": "0:04:36", "remaining_time": "2:04:51"}
5
+ {"current_steps": 50, "total_steps": 1125, "loss": 0.6083, "accuracy": 0.4625000059604645, "lr": 2.212389380530974e-06, "epoch": 0.044444444444444446, "percentage": 4.44, "elapsed_time": "0:05:44", "remaining_time": "2:03:19"}
6
+ {"current_steps": 60, "total_steps": 1125, "loss": 0.3835, "accuracy": 0.9125000238418579, "lr": 2.6548672566371687e-06, "epoch": 0.05333333333333334, "percentage": 5.33, "elapsed_time": "0:06:51", "remaining_time": "2:01:46"}
7
+ {"current_steps": 60, "total_steps": 1125, "eval_loss": 0.3286525011062622, "epoch": 0.05333333333333334, "percentage": 5.33, "elapsed_time": "0:13:03", "remaining_time": "3:51:50"}
8
+ {"current_steps": 70, "total_steps": 1125, "loss": 0.2931, "accuracy": 0.9375, "lr": 3.097345132743363e-06, "epoch": 0.06222222222222222, "percentage": 6.22, "elapsed_time": "0:14:08", "remaining_time": "3:33:12"}
9
+ {"current_steps": 80, "total_steps": 1125, "loss": 0.202, "accuracy": 0.9750000238418579, "lr": 3.539823008849558e-06, "epoch": 0.07111111111111111, "percentage": 7.11, "elapsed_time": "0:15:13", "remaining_time": "3:18:57"}
10
+ {"current_steps": 90, "total_steps": 1125, "loss": 0.2123, "accuracy": 0.925000011920929, "lr": 3.982300884955752e-06, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:16:19", "remaining_time": "3:07:39"}
11
+ {"current_steps": 100, "total_steps": 1125, "loss": 0.2592, "accuracy": 0.9375, "lr": 4.424778761061948e-06, "epoch": 0.08888888888888889, "percentage": 8.89, "elapsed_time": "0:17:24", "remaining_time": "2:58:22"}
12
+ {"current_steps": 110, "total_steps": 1125, "loss": 0.1336, "accuracy": 0.949999988079071, "lr": 4.867256637168142e-06, "epoch": 0.09777777777777778, "percentage": 9.78, "elapsed_time": "0:18:29", "remaining_time": "2:50:35"}
13
+ {"current_steps": 120, "total_steps": 1125, "loss": 0.2552, "accuracy": 0.8999999761581421, "lr": 4.999409761242696e-06, "epoch": 0.10666666666666667, "percentage": 10.67, "elapsed_time": "0:19:34", "remaining_time": "2:43:55"}
14
+ {"current_steps": 120, "total_steps": 1125, "eval_loss": 0.18996010720729828, "epoch": 0.10666666666666667, "percentage": 10.67, "elapsed_time": "0:25:35", "remaining_time": "3:34:23"}
15
+ {"current_steps": 130, "total_steps": 1125, "loss": 0.1394, "accuracy": 0.9624999761581421, "lr": 4.996519466816778e-06, "epoch": 0.11555555555555555, "percentage": 11.56, "elapsed_time": "0:26:41", "remaining_time": "3:24:16"}
16
+ {"current_steps": 140, "total_steps": 1125, "loss": 0.1408, "accuracy": 0.9750000238418579, "lr": 4.9912234871722805e-06, "epoch": 0.12444444444444444, "percentage": 12.44, "elapsed_time": "0:27:46", "remaining_time": "3:15:25"}
17
+ {"current_steps": 150, "total_steps": 1125, "loss": 0.2153, "accuracy": 0.9375, "lr": 4.98352692559805e-06, "epoch": 0.13333333333333333, "percentage": 13.33, "elapsed_time": "0:28:51", "remaining_time": "3:07:35"}
18
+ {"current_steps": 160, "total_steps": 1125, "loss": 0.0388, "accuracy": 0.9750000238418579, "lr": 4.973437198621237e-06, "epoch": 0.14222222222222222, "percentage": 14.22, "elapsed_time": "0:29:56", "remaining_time": "3:00:36"}
19
+ {"current_steps": 170, "total_steps": 1125, "loss": 0.4651, "accuracy": 0.875, "lr": 4.960964028860621e-06, "epoch": 0.1511111111111111, "percentage": 15.11, "elapsed_time": "0:31:01", "remaining_time": "2:54:18"}
20
+ {"current_steps": 180, "total_steps": 1125, "loss": 0.2362, "accuracy": 0.925000011920929, "lr": 4.946119435657738e-06, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:32:06", "remaining_time": "2:48:34"}
21
+ {"current_steps": 180, "total_steps": 1125, "eval_loss": 0.20236633718013763, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:38:08", "remaining_time": "3:20:12"}
22
+ {"current_steps": 190, "total_steps": 1125, "loss": 0.2442, "accuracy": 0.9500000476837158, "lr": 4.928917723494854e-06, "epoch": 0.1688888888888889, "percentage": 16.89, "elapsed_time": "0:39:13", "remaining_time": "3:12:59"}
23
+ {"current_steps": 200, "total_steps": 1125, "loss": 0.1915, "accuracy": 0.9500000476837158, "lr": 4.909375468210947e-06, "epoch": 0.17777777777777778, "percentage": 17.78, "elapsed_time": "0:40:18", "remaining_time": "3:06:24"}
24
+ {"current_steps": 210, "total_steps": 1125, "loss": 0.2843, "accuracy": 0.9375, "lr": 4.8875115010289655e-06, "epoch": 0.18666666666666668, "percentage": 18.67, "elapsed_time": "0:41:23", "remaining_time": "3:00:19"}
25
+ {"current_steps": 220, "total_steps": 1125, "loss": 0.5104, "accuracy": 0.875, "lr": 4.863346890409768e-06, "epoch": 0.19555555555555557, "percentage": 19.56, "elapsed_time": "0:42:28", "remaining_time": "2:54:41"}
26
+ {"current_steps": 230, "total_steps": 1125, "loss": 0.2134, "accuracy": 0.925000011920929, "lr": 4.836904921750224e-06, "epoch": 0.20444444444444446, "percentage": 20.44, "elapsed_time": "0:43:33", "remaining_time": "2:49:28"}
27
+ {"current_steps": 240, "total_steps": 1125, "loss": 0.1781, "accuracy": 0.9500000476837158, "lr": 4.808211074945042e-06, "epoch": 0.21333333333333335, "percentage": 21.33, "elapsed_time": "0:44:38", "remaining_time": "2:44:35"}
28
+ {"current_steps": 240, "total_steps": 1125, "eval_loss": 0.1546352356672287, "epoch": 0.21333333333333335, "percentage": 21.33, "elapsed_time": "0:50:39", "remaining_time": "3:06:48"}
29
+ {"current_steps": 250, "total_steps": 1125, "loss": 0.1373, "accuracy": 0.9375, "lr": 4.7772929998339485e-06, "epoch": 0.2222222222222222, "percentage": 22.22, "elapsed_time": "0:51:44", "remaining_time": "3:01:05"}
30
+ {"current_steps": 260, "total_steps": 1125, "loss": 0.1896, "accuracy": 0.949999988079071, "lr": 4.744180489557859e-06, "epoch": 0.2311111111111111, "percentage": 23.11, "elapsed_time": "0:52:49", "remaining_time": "2:55:44"}
31
+ {"current_steps": 270, "total_steps": 1125, "loss": 0.2315, "accuracy": 0.9375, "lr": 4.708905451849754e-06, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:53:54", "remaining_time": "2:50:42"}
32
+ {"current_steps": 280, "total_steps": 1125, "loss": 0.5343, "accuracy": 0.862500011920929, "lr": 4.671501878287879e-06, "epoch": 0.24888888888888888, "percentage": 24.89, "elapsed_time": "0:54:59", "remaining_time": "2:45:56"}
33
+ {"current_steps": 290, "total_steps": 1125, "loss": 0.5017, "accuracy": 0.887499988079071, "lr": 4.6320058115409295e-06, "epoch": 0.2577777777777778, "percentage": 25.78, "elapsed_time": "0:56:04", "remaining_time": "2:41:26"}
34
+ {"current_steps": 300, "total_steps": 1125, "loss": 0.265, "accuracy": 0.925000011920929, "lr": 4.590455310636778e-06, "epoch": 0.26666666666666666, "percentage": 26.67, "elapsed_time": "0:57:09", "remaining_time": "2:37:09"}
35
+ {"current_steps": 300, "total_steps": 1125, "eval_loss": 0.15360687673091888, "epoch": 0.26666666666666666, "percentage": 26.67, "elapsed_time": "1:03:10", "remaining_time": "2:53:44"}
36
+ {"current_steps": 310, "total_steps": 1125, "loss": 0.1132, "accuracy": 0.9500000476837158, "lr": 4.54689041428819e-06, "epoch": 0.27555555555555555, "percentage": 27.56, "elapsed_time": "1:04:15", "remaining_time": "2:48:56"}
37
+ {"current_steps": 320, "total_steps": 1125, "loss": 0.1194, "accuracy": 0.9500000476837158, "lr": 4.501353102310901e-06, "epoch": 0.28444444444444444, "percentage": 28.44, "elapsed_time": "1:05:21", "remaining_time": "2:44:25"}
38
+ {"current_steps": 330, "total_steps": 1125, "loss": 0.0926, "accuracy": 0.9500000476837158, "lr": 4.453887255171206e-06, "epoch": 0.29333333333333333, "percentage": 29.33, "elapsed_time": "1:06:26", "remaining_time": "2:40:04"}
39
+ {"current_steps": 340, "total_steps": 1125, "loss": 0.2873, "accuracy": 0.925000011920929, "lr": 4.404538611702055e-06, "epoch": 0.3022222222222222, "percentage": 30.22, "elapsed_time": "1:07:31", "remaining_time": "2:35:54"}
40
+ {"current_steps": 350, "total_steps": 1125, "loss": 0.4332, "accuracy": 0.875, "lr": 4.3533547250284015e-06, "epoch": 0.3111111111111111, "percentage": 31.11, "elapsed_time": "1:08:36", "remaining_time": "2:31:54"}
41
+ {"current_steps": 360, "total_steps": 1125, "loss": 0.1605, "accuracy": 0.9500000476837158, "lr": 4.300384916744261e-06, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "1:09:41", "remaining_time": "2:28:05"}
42
+ {"current_steps": 360, "total_steps": 1125, "eval_loss": 0.3194349706172943, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "1:15:42", "remaining_time": "2:40:53"}
43
+ {"current_steps": 370, "total_steps": 1125, "loss": 0.1711, "accuracy": 0.9500000476837158, "lr": 4.24568022938566e-06, "epoch": 0.3288888888888889, "percentage": 32.89, "elapsed_time": "1:16:47", "remaining_time": "2:36:42"}
44
+ {"current_steps": 380, "total_steps": 1125, "loss": 0.4359, "accuracy": 0.8875000476837158, "lr": 4.189293377245241e-06, "epoch": 0.3377777777777778, "percentage": 33.78, "elapsed_time": "1:17:52", "remaining_time": "2:32:40"}
45
+ {"current_steps": 390, "total_steps": 1125, "loss": 0.2161, "accuracy": 0.949999988079071, "lr": 4.131278695575952e-06, "epoch": 0.3466666666666667, "percentage": 34.67, "elapsed_time": "1:18:57", "remaining_time": "2:28:48"}
46
+ {"current_steps": 400, "total_steps": 1125, "loss": 0.2077, "accuracy": 0.925000011920929, "lr": 4.071692088232743e-06, "epoch": 0.35555555555555557, "percentage": 35.56, "elapsed_time": "1:20:02", "remaining_time": "2:25:04"}
47
+ {"current_steps": 410, "total_steps": 1125, "loss": 0.2478, "accuracy": 0.8875000476837158, "lr": 4.010590973802737e-06, "epoch": 0.36444444444444446, "percentage": 36.44, "elapsed_time": "1:21:07", "remaining_time": "2:21:27"}
48
+ {"current_steps": 420, "total_steps": 1125, "loss": 0.2894, "accuracy": 0.8999999761581421, "lr": 3.948034230275781e-06, "epoch": 0.37333333333333335, "percentage": 37.33, "elapsed_time": "1:22:12", "remaining_time": "2:17:58"}
49
+ {"current_steps": 420, "total_steps": 1125, "eval_loss": 0.16789735853672028, "epoch": 0.37333333333333335, "percentage": 37.33, "elapsed_time": "1:28:13", "remaining_time": "2:28:05"}
50
+ {"current_steps": 430, "total_steps": 1125, "loss": 0.2298, "accuracy": 0.9500000476837158, "lr": 3.884082138308699e-06, "epoch": 0.38222222222222224, "percentage": 38.22, "elapsed_time": "1:29:18", "remaining_time": "2:24:20"}
51
+ {"current_steps": 440, "total_steps": 1125, "loss": 0.2513, "accuracy": 0.9375, "lr": 3.818796323137896e-06, "epoch": 0.39111111111111113, "percentage": 39.11, "elapsed_time": "1:30:23", "remaining_time": "2:20:43"}
52
+ {"current_steps": 450, "total_steps": 1125, "loss": 0.1758, "accuracy": 0.9500000476837158, "lr": 3.7522396951963303e-06, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "1:31:28", "remaining_time": "2:17:12"}
53
+ {"current_steps": 460, "total_steps": 1125, "loss": 0.1981, "accuracy": 0.9500000476837158, "lr": 3.684476389492026e-06, "epoch": 0.4088888888888889, "percentage": 40.89, "elapsed_time": "1:32:33", "remaining_time": "2:13:48"}
54
+ {"current_steps": 470, "total_steps": 1125, "loss": 0.2066, "accuracy": 0.9375, "lr": 3.6155717038065783e-06, "epoch": 0.4177777777777778, "percentage": 41.78, "elapsed_time": "1:33:38", "remaining_time": "2:10:29"}
55
+ {"current_steps": 480, "total_steps": 1125, "loss": 0.1149, "accuracy": 0.9500000476837158, "lr": 3.545592035773192e-06, "epoch": 0.4266666666666667, "percentage": 42.67, "elapsed_time": "1:34:43", "remaining_time": "2:07:16"}
56
+ {"current_steps": 480, "total_steps": 1125, "eval_loss": 0.29511645436286926, "epoch": 0.4266666666666667, "percentage": 42.67, "elapsed_time": "1:40:44", "remaining_time": "2:15:22"}
57
+ {"current_steps": 490, "total_steps": 1125, "loss": 0.2979, "accuracy": 0.9125000238418579, "lr": 3.4746048188948806e-06, "epoch": 0.43555555555555553, "percentage": 43.56, "elapsed_time": "1:41:49", "remaining_time": "2:11:57"}
58
+ {"current_steps": 500, "total_steps": 1125, "loss": 0.4378, "accuracy": 0.8875000476837158, "lr": 3.4026784575644887e-06, "epoch": 0.4444444444444444, "percentage": 44.44, "elapsed_time": "1:42:54", "remaining_time": "2:08:37"}
59
+ {"current_steps": 510, "total_steps": 1125, "loss": 0.2613, "accuracy": 0.9375, "lr": 3.329882261149148e-06, "epoch": 0.4533333333333333, "percentage": 45.33, "elapsed_time": "1:44:00", "remaining_time": "2:05:25"}
60
+ {"current_steps": 520, "total_steps": 1125, "loss": 0.1517, "accuracy": 0.9375, "lr": 3.25628637720269e-06, "epoch": 0.4622222222222222, "percentage": 46.22, "elapsed_time": "1:45:05", "remaining_time": "2:02:16"}
61
+ {"current_steps": 530, "total_steps": 1125, "loss": 0.2304, "accuracy": 0.9375, "lr": 3.181961723870359e-06, "epoch": 0.4711111111111111, "percentage": 47.11, "elapsed_time": "1:46:10", "remaining_time": "1:59:11"}
62
+ {"current_steps": 540, "total_steps": 1125, "loss": 0.0384, "accuracy": 0.987500011920929, "lr": 3.1069799215509847e-06, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "1:47:15", "remaining_time": "1:56:11"}
63
+ {"current_steps": 540, "total_steps": 1125, "eval_loss": 0.17387841641902924, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "1:53:16", "remaining_time": "2:02:43"}
64
+ {"current_steps": 550, "total_steps": 1125, "loss": 0.2087, "accuracy": 0.9125000238418579, "lr": 3.0314132238824416e-06, "epoch": 0.4888888888888889, "percentage": 48.89, "elapsed_time": "1:54:21", "remaining_time": "1:59:33"}
65
+ {"current_steps": 560, "total_steps": 1125, "loss": 0.0969, "accuracy": 0.9624999761581421, "lr": 2.955334448116915e-06, "epoch": 0.49777777777777776, "percentage": 49.78, "elapsed_time": "1:55:26", "remaining_time": "1:56:28"}
66
+ {"current_steps": 570, "total_steps": 1125, "loss": 0.1447, "accuracy": 0.949999988079071, "lr": 2.8788169049530533e-06, "epoch": 0.5066666666666667, "percentage": 50.67, "elapsed_time": "1:56:31", "remaining_time": "1:53:27"}
67
+ {"current_steps": 580, "total_steps": 1125, "loss": 0.0998, "accuracy": 0.949999988079071, "lr": 2.8019343278926397e-06, "epoch": 0.5155555555555555, "percentage": 51.56, "elapsed_time": "1:57:36", "remaining_time": "1:50:30"}
68
+ {"current_steps": 590, "total_steps": 1125, "loss": 0.1162, "accuracy": 0.9750000238418579, "lr": 2.7247608021898265e-06, "epoch": 0.5244444444444445, "percentage": 52.44, "elapsed_time": "1:58:41", "remaining_time": "1:47:37"}
69
+ {"current_steps": 600, "total_steps": 1125, "loss": 0.4008, "accuracy": 0.8875000476837158, "lr": 2.647370693461432e-06, "epoch": 0.5333333333333333, "percentage": 53.33, "elapsed_time": "1:59:46", "remaining_time": "1:44:47"}
70
+ {"current_steps": 600, "total_steps": 1125, "eval_loss": 0.17061151564121246, "epoch": 0.5333333333333333, "percentage": 53.33, "elapsed_time": "2:05:47", "remaining_time": "1:50:04"}
71
+ {"current_steps": 610, "total_steps": 1125, "loss": 0.1265, "accuracy": 0.9624999761581421, "lr": 2.569838576027068e-06, "epoch": 0.5422222222222223, "percentage": 54.22, "elapsed_time": "2:06:52", "remaining_time": "1:47:06"}
72
+ {"current_steps": 620, "total_steps": 1125, "loss": 0.097, "accuracy": 0.9624999761581421, "lr": 2.4922391610481544e-06, "epoch": 0.5511111111111111, "percentage": 55.11, "elapsed_time": "2:07:57", "remaining_time": "1:44:13"}
73
+ {"current_steps": 630, "total_steps": 1125, "loss": 0.3115, "accuracy": 0.925000011920929, "lr": 2.4146472245350804e-06, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "2:09:02", "remaining_time": "1:41:23"}
74
+ {"current_steps": 640, "total_steps": 1125, "loss": 0.1268, "accuracy": 0.9500000476837158, "lr": 2.337137535291868e-06, "epoch": 0.5688888888888889, "percentage": 56.89, "elapsed_time": "2:10:07", "remaining_time": "1:38:36"}
75
+ {"current_steps": 650, "total_steps": 1125, "loss": 0.1831, "accuracy": 0.9500000476837158, "lr": 2.259784782867782e-06, "epoch": 0.5777777777777777, "percentage": 57.78, "elapsed_time": "2:11:12", "remaining_time": "1:35:52"}
76
+ {"current_steps": 660, "total_steps": 1125, "loss": 0.1678, "accuracy": 0.9624999761581421, "lr": 2.182663505585314e-06, "epoch": 0.5866666666666667, "percentage": 58.67, "elapsed_time": "2:12:17", "remaining_time": "1:33:11"}
77
+ {"current_steps": 660, "total_steps": 1125, "eval_loss": 0.2050127536058426, "epoch": 0.5866666666666667, "percentage": 58.67, "elapsed_time": "2:18:18", "remaining_time": "1:37:26"}
78
+ {"current_steps": 670, "total_steps": 1125, "loss": 0.1025, "accuracy": 0.9624999761581421, "lr": 2.1058480187138863e-06, "epoch": 0.5955555555555555, "percentage": 59.56, "elapsed_time": "2:19:23", "remaining_time": "1:34:39"}
79
+ {"current_steps": 680, "total_steps": 1125, "loss": 0.1961, "accuracy": 0.9500000476837158, "lr": 2.0294123428584985e-06, "epoch": 0.6044444444444445, "percentage": 60.44, "elapsed_time": "2:20:28", "remaining_time": "1:31:55"}
80
+ {"current_steps": 690, "total_steps": 1125, "loss": 0.2821, "accuracy": 0.9375, "lr": 1.953430132632311e-06, "epoch": 0.6133333333333333, "percentage": 61.33, "elapsed_time": "2:21:33", "remaining_time": "1:29:14"}
81
+ {"current_steps": 700, "total_steps": 1125, "loss": 0.3141, "accuracy": 0.9375, "lr": 1.8779746056819104e-06, "epoch": 0.6222222222222222, "percentage": 62.22, "elapsed_time": "2:22:38", "remaining_time": "1:26:36"}
82
+ {"current_steps": 710, "total_steps": 1125, "loss": 0.3827, "accuracy": 0.887499988079071, "lr": 1.8031184721336364e-06, "epoch": 0.6311111111111111, "percentage": 63.11, "elapsed_time": "2:23:43", "remaining_time": "1:24:00"}
83
+ {"current_steps": 720, "total_steps": 1125, "loss": 0.2272, "accuracy": 0.949999988079071, "lr": 1.7289338645289711e-06, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "2:24:48", "remaining_time": "1:21:27"}
84
+ {"current_steps": 720, "total_steps": 1125, "eval_loss": 0.14018221199512482, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "2:30:49", "remaining_time": "1:24:50"}
85
+ {"current_steps": 730, "total_steps": 1125, "loss": 0.1171, "accuracy": 0.9624999761581421, "lr": 1.6554922683164875e-06, "epoch": 0.6488888888888888, "percentage": 64.89, "elapsed_time": "2:31:54", "remaining_time": "1:22:11"}
86
+ {"current_steps": 740, "total_steps": 1125, "loss": 0.2001, "accuracy": 0.9624999761581421, "lr": 1.5828644529673592e-06, "epoch": 0.6577777777777778, "percentage": 65.78, "elapsed_time": "2:32:59", "remaining_time": "1:19:35"}
87
+ {"current_steps": 750, "total_steps": 1125, "loss": 0.1692, "accuracy": 0.9375, "lr": 1.5111204037807844e-06, "epoch": 0.6666666666666666, "percentage": 66.67, "elapsed_time": "2:34:04", "remaining_time": "1:17:02"}
88
+ {"current_steps": 760, "total_steps": 1125, "loss": 0.2418, "accuracy": 0.949999988079071, "lr": 1.4403292544450625e-06, "epoch": 0.6755555555555556, "percentage": 67.56, "elapsed_time": "2:35:09", "remaining_time": "1:14:30"}
89
+ {"current_steps": 770, "total_steps": 1125, "loss": 0.1749, "accuracy": 0.9500000476837158, "lr": 1.3705592204192853e-06, "epoch": 0.6844444444444444, "percentage": 68.44, "elapsed_time": "2:36:14", "remaining_time": "1:12:01"}
90
+ {"current_steps": 780, "total_steps": 1125, "loss": 0.1915, "accuracy": 0.925000011920929, "lr": 1.301877533199859e-06, "epoch": 0.6933333333333334, "percentage": 69.33, "elapsed_time": "2:37:19", "remaining_time": "1:09:34"}
91
+ {"current_steps": 780, "total_steps": 1125, "eval_loss": 0.24412688612937927, "epoch": 0.6933333333333334, "percentage": 69.33, "elapsed_time": "2:43:20", "remaining_time": "1:12:14"}
92
+ {"current_steps": 790, "total_steps": 1125, "loss": 0.2143, "accuracy": 0.9375, "lr": 1.2343503755351729e-06, "epoch": 0.7022222222222222, "percentage": 70.22, "elapsed_time": "2:44:25", "remaining_time": "1:09:43"}
93
+ {"current_steps": 800, "total_steps": 1125, "loss": 0.0792, "accuracy": 0.9750000238418579, "lr": 1.168042817650881e-06, "epoch": 0.7111111111111111, "percentage": 71.11, "elapsed_time": "2:45:30", "remaining_time": "1:07:14"}
94
+ {"current_steps": 810, "total_steps": 1125, "loss": 0.3177, "accuracy": 0.925000011920929, "lr": 1.1030187545472012e-06, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "2:46:35", "remaining_time": "1:04:47"}
95
+ {"current_steps": 820, "total_steps": 1125, "loss": 0.2128, "accuracy": 0.949999988079071, "lr": 1.0393408444287048e-06, "epoch": 0.7288888888888889, "percentage": 72.89, "elapsed_time": "2:47:40", "remaining_time": "1:02:21"}
96
+ {"current_steps": 830, "total_steps": 1125, "loss": 0.1296, "accuracy": 0.9750000238418579, "lr": 9.770704483258782e-07, "epoch": 0.7377777777777778, "percentage": 73.78, "elapsed_time": "2:48:44", "remaining_time": "0:59:58"}
97
+ {"current_steps": 840, "total_steps": 1125, "loss": 0.0635, "accuracy": 0.987500011920929, "lr": 9.162675709666865e-07, "epoch": 0.7466666666666667, "percentage": 74.67, "elapsed_time": "2:49:49", "remaining_time": "0:57:37"}
98
+ {"current_steps": 840, "total_steps": 1125, "eval_loss": 0.1689341962337494, "epoch": 0.7466666666666667, "percentage": 74.67, "elapsed_time": "2:55:51", "remaining_time": "0:59:39"}
99
+ {"current_steps": 850, "total_steps": 1125, "loss": 0.1157, "accuracy": 0.9750000238418579, "lr": 8.569908029550686e-07, "epoch": 0.7555555555555555, "percentage": 75.56, "elapsed_time": "2:56:56", "remaining_time": "0:57:14"}
100
+ {"current_steps": 860, "total_steps": 1125, "loss": 0.2252, "accuracy": 0.9375, "lr": 7.992972643121227e-07, "epoch": 0.7644444444444445, "percentage": 76.44, "elapsed_time": "2:58:01", "remaining_time": "0:54:51"}
101
+ {"current_steps": 870, "total_steps": 1125, "loss": 0.1398, "accuracy": 0.9624999761581421, "lr": 7.432425494343509e-07, "epoch": 0.7733333333333333, "percentage": 77.33, "elapsed_time": "2:59:06", "remaining_time": "0:52:29"}
102
+ {"current_steps": 880, "total_steps": 1125, "loss": 0.2966, "accuracy": 0.9375, "lr": 6.888806735220396e-07, "epoch": 0.7822222222222223, "percentage": 78.22, "elapsed_time": "3:00:11", "remaining_time": "0:50:09"}
103
+ {"current_steps": 890, "total_steps": 1125, "loss": 0.1641, "accuracy": 0.9375, "lr": 6.362640205293583e-07, "epoch": 0.7911111111111111, "percentage": 79.11, "elapsed_time": "3:01:15", "remaining_time": "0:47:51"}
104
+ {"current_steps": 900, "total_steps": 1125, "loss": 0.1703, "accuracy": 0.9624999761581421, "lr": 5.854432926863684e-07, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "3:02:20", "remaining_time": "0:45:35"}
105
+ {"current_steps": 900, "total_steps": 1125, "eval_loss": 0.13995186984539032, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "3:08:22", "remaining_time": "0:47:05"}
106
+ {"current_steps": 910, "total_steps": 1125, "loss": 0.0507, "accuracy": 0.9750000238418579, "lr": 5.364674616415547e-07, "epoch": 0.8088888888888889, "percentage": 80.89, "elapsed_time": "3:09:27", "remaining_time": "0:44:45"}
107
+ {"current_steps": 920, "total_steps": 1125, "loss": 0.0881, "accuracy": 0.9500000476837158, "lr": 4.893837212719859e-07, "epoch": 0.8177777777777778, "percentage": 81.78, "elapsed_time": "3:10:31", "remaining_time": "0:42:27"}
108
+ {"current_steps": 930, "total_steps": 1125, "loss": 0.1373, "accuracy": 0.949999988079071, "lr": 4.442374422065493e-07, "epoch": 0.8266666666666667, "percentage": 82.67, "elapsed_time": "3:11:36", "remaining_time": "0:40:10"}
109
+ {"current_steps": 940, "total_steps": 1125, "loss": 0.2763, "accuracy": 0.925000011920929, "lr": 4.0107212810610974e-07, "epoch": 0.8355555555555556, "percentage": 83.56, "elapsed_time": "3:12:41", "remaining_time": "0:37:55"}
110
+ {"current_steps": 950, "total_steps": 1125, "loss": 0.1758, "accuracy": 0.949999988079071, "lr": 3.599293737426932e-07, "epoch": 0.8444444444444444, "percentage": 84.44, "elapsed_time": "3:13:46", "remaining_time": "0:35:41"}
111
+ {"current_steps": 960, "total_steps": 1125, "loss": 0.1138, "accuracy": 0.9624999761581421, "lr": 3.208488249181216e-07, "epoch": 0.8533333333333334, "percentage": 85.33, "elapsed_time": "3:14:51", "remaining_time": "0:33:29"}
112
+ {"current_steps": 960, "total_steps": 1125, "eval_loss": 0.1441129744052887, "epoch": 0.8533333333333334, "percentage": 85.33, "elapsed_time": "3:20:53", "remaining_time": "0:34:31"}
113
+ {"current_steps": 970, "total_steps": 1125, "loss": 0.2425, "accuracy": 0.925000011920929, "lr": 2.838681402606952e-07, "epoch": 0.8622222222222222, "percentage": 86.22, "elapsed_time": "3:21:57", "remaining_time": "0:32:16"}
114
+ {"current_steps": 980, "total_steps": 1125, "loss": 0.0609, "accuracy": 0.9750000238418579, "lr": 2.490229549367443e-07, "epoch": 0.8711111111111111, "percentage": 87.11, "elapsed_time": "3:23:02", "remaining_time": "0:30:02"}
115
+ {"current_steps": 990, "total_steps": 1125, "loss": 0.3464, "accuracy": 0.8999999761581421, "lr": 2.1634684631203412e-07, "epoch": 0.88, "percentage": 88.0, "elapsed_time": "3:24:07", "remaining_time": "0:27:50"}
116
+ {"current_steps": 1000, "total_steps": 1125, "loss": 0.0063, "accuracy": 1.0, "lr": 1.8587130159608196e-07, "epoch": 0.8888888888888888, "percentage": 88.89, "elapsed_time": "3:25:12", "remaining_time": "0:25:39"}
117
+ {"current_steps": 1010, "total_steps": 1125, "loss": 0.2741, "accuracy": 0.925000011920929, "lr": 1.5762568750059604e-07, "epoch": 0.8977777777777778, "percentage": 89.78, "elapsed_time": "3:26:18", "remaining_time": "0:23:29"}
118
+ {"current_steps": 1020, "total_steps": 1125, "loss": 0.0513, "accuracy": 0.9750000238418579, "lr": 1.316372219412454e-07, "epoch": 0.9066666666666666, "percentage": 90.67, "elapsed_time": "3:27:23", "remaining_time": "0:21:20"}
119
+ {"current_steps": 1020, "total_steps": 1125, "eval_loss": 0.14118175208568573, "epoch": 0.9066666666666666, "percentage": 90.67, "elapsed_time": "3:33:25", "remaining_time": "0:21:58"}
120
+ {"current_steps": 1030, "total_steps": 1125, "loss": 0.0349, "accuracy": 0.987500011920929, "lr": 1.0793094781005792e-07, "epoch": 0.9155555555555556, "percentage": 91.56, "elapsed_time": "3:34:29", "remaining_time": "0:19:47"}
121
+ {"current_steps": 1040, "total_steps": 1125, "loss": 0.1927, "accuracy": 0.9500000476837158, "lr": 8.652970884369255e-08, "epoch": 0.9244444444444444, "percentage": 92.44, "elapsed_time": "3:35:34", "remaining_time": "0:17:37"}
122
+ {"current_steps": 1050, "total_steps": 1125, "loss": 0.1317, "accuracy": 0.9750000238418579, "lr": 6.745412761086007e-08, "epoch": 0.9333333333333333, "percentage": 93.33, "elapsed_time": "3:36:39", "remaining_time": "0:15:28"}
123
+ {"current_steps": 1060, "total_steps": 1125, "loss": 0.2279, "accuracy": 0.9375, "lr": 5.0722585640090305e-08, "epoch": 0.9422222222222222, "percentage": 94.22, "elapsed_time": "3:37:44", "remaining_time": "0:13:21"}
124
+ {"current_steps": 1070, "total_steps": 1125, "loss": 0.1021, "accuracy": 0.9624999761581421, "lr": 3.635120570700784e-08, "epoch": 0.9511111111111111, "percentage": 95.11, "elapsed_time": "3:38:49", "remaining_time": "0:11:14"}
125
+ {"current_steps": 1080, "total_steps": 1125, "loss": 0.1189, "accuracy": 0.9750000238418579, "lr": 2.4353836298169343e-08, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "3:39:54", "remaining_time": "0:09:09"}
126
+ {"current_steps": 1080, "total_steps": 1125, "eval_loss": 0.15081512928009033, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "3:45:55", "remaining_time": "0:09:24"}
127
+ {"current_steps": 1090, "total_steps": 1125, "loss": 0.0728, "accuracy": 0.9750000238418579, "lr": 1.4742038266447046e-08, "epoch": 0.9688888888888889, "percentage": 96.89, "elapsed_time": "3:47:00", "remaining_time": "0:07:17"}
128
+ {"current_steps": 1100, "total_steps": 1125, "loss": 0.0357, "accuracy": 0.9750000238418579, "lr": 7.525073690809737e-09, "epoch": 0.9777777777777777, "percentage": 97.78, "elapsed_time": "3:48:05", "remaining_time": "0:05:11"}
129
+ {"current_steps": 1110, "total_steps": 1125, "loss": 0.1973, "accuracy": 0.949999988079071, "lr": 2.709896951238744e-09, "epoch": 0.9866666666666667, "percentage": 98.67, "elapsed_time": "3:49:10", "remaining_time": "0:03:05"}
130
+ {"current_steps": 1120, "total_steps": 1125, "loss": 0.0538, "accuracy": 0.987500011920929, "lr": 3.0114802737818415e-10, "epoch": 0.9955555555555555, "percentage": 99.56, "elapsed_time": "3:50:15", "remaining_time": "0:01:01"}
131
+ {"current_steps": 1125, "total_steps": 1125, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "3:50:48", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,2010 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 60,
6
+ "global_step": 1125,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.008888888888888889,
13
+ "grad_norm": 8.760091781616211,
14
+ "learning_rate": 4.4247787610619474e-07,
15
+ "logits/chosen": -0.8248252868652344,
16
+ "logits/rejected": -0.8263720273971558,
17
+ "logps/chosen": -0.36086463928222656,
18
+ "logps/rejected": -5.696224689483643,
19
+ "loss": 1.1038,
20
+ "rewards/accuracies": 0.5125000476837158,
21
+ "rewards/chosen": 17.43745994567871,
22
+ "rewards/margins": 0.5984855890274048,
23
+ "rewards/rejected": 16.838973999023438,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.017777777777777778,
28
+ "grad_norm": 8.855981826782227,
29
+ "learning_rate": 8.849557522123895e-07,
30
+ "logits/chosen": -0.8169006109237671,
31
+ "logits/rejected": -0.819770872592926,
32
+ "logps/chosen": -0.12464660406112671,
33
+ "logps/rejected": -7.139842987060547,
34
+ "loss": 1.1887,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 17.17649269104004,
37
+ "rewards/margins": 0.19107049703598022,
38
+ "rewards/rejected": 16.98542022705078,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.02666666666666667,
43
+ "grad_norm": 16.764184951782227,
44
+ "learning_rate": 1.3274336283185843e-06,
45
+ "logits/chosen": -0.8003113865852356,
46
+ "logits/rejected": -0.8030117750167847,
47
+ "logps/chosen": -0.34651467204093933,
48
+ "logps/rejected": -6.967917442321777,
49
+ "loss": 1.0563,
50
+ "rewards/accuracies": 0.44999998807907104,
51
+ "rewards/chosen": 17.280975341796875,
52
+ "rewards/margins": 0.40005987882614136,
53
+ "rewards/rejected": 16.88091468811035,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.035555555555555556,
58
+ "grad_norm": 8.33682918548584,
59
+ "learning_rate": 1.769911504424779e-06,
60
+ "logits/chosen": -0.7695047855377197,
61
+ "logits/rejected": -0.7739207148551941,
62
+ "logps/chosen": -1.5993006229400635,
63
+ "logps/rejected": -8.504932403564453,
64
+ "loss": 0.7596,
65
+ "rewards/accuracies": 0.5,
66
+ "rewards/chosen": 17.283912658691406,
67
+ "rewards/margins": 0.6976072192192078,
68
+ "rewards/rejected": 16.5863037109375,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.044444444444444446,
73
+ "grad_norm": 4.494723320007324,
74
+ "learning_rate": 2.212389380530974e-06,
75
+ "logits/chosen": -0.7154140472412109,
76
+ "logits/rejected": -0.7225576043128967,
77
+ "logps/chosen": -3.112199068069458,
78
+ "logps/rejected": -12.212080001831055,
79
+ "loss": 0.6083,
80
+ "rewards/accuracies": 0.4625000059604645,
81
+ "rewards/chosen": 17.03064727783203,
82
+ "rewards/margins": 0.7148451805114746,
83
+ "rewards/rejected": 16.3158016204834,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.05333333333333334,
88
+ "grad_norm": 5.110287666320801,
89
+ "learning_rate": 2.6548672566371687e-06,
90
+ "logits/chosen": -0.6322453022003174,
91
+ "logits/rejected": -0.6387485265731812,
92
+ "logps/chosen": -5.650620460510254,
93
+ "logps/rejected": -12.759811401367188,
94
+ "loss": 0.3835,
95
+ "rewards/accuracies": 0.9125000238418579,
96
+ "rewards/chosen": 17.101289749145508,
97
+ "rewards/margins": 1.1824612617492676,
98
+ "rewards/rejected": 15.918828964233398,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.05333333333333334,
103
+ "eval_logits/chosen": -0.5826543569564819,
104
+ "eval_logits/rejected": -0.5914276838302612,
105
+ "eval_logps/chosen": -3.5471787452697754,
106
+ "eval_logps/rejected": -16.51181983947754,
107
+ "eval_loss": 0.3286525011062622,
108
+ "eval_rewards/accuracies": 0.9280000925064087,
109
+ "eval_rewards/chosen": 17.148174285888672,
110
+ "eval_rewards/margins": 1.4386365413665771,
111
+ "eval_rewards/rejected": 15.709539413452148,
112
+ "eval_runtime": 372.0227,
113
+ "eval_samples_per_second": 2.688,
114
+ "eval_steps_per_second": 0.336,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.06222222222222222,
119
+ "grad_norm": 5.098133563995361,
120
+ "learning_rate": 3.097345132743363e-06,
121
+ "logits/chosen": -0.5378152132034302,
122
+ "logits/rejected": -0.5494933724403381,
123
+ "logps/chosen": -1.5099802017211914,
124
+ "logps/rejected": -21.206321716308594,
125
+ "loss": 0.2931,
126
+ "rewards/accuracies": 0.9375,
127
+ "rewards/chosen": 17.083791732788086,
128
+ "rewards/margins": 1.5844331979751587,
129
+ "rewards/rejected": 15.499359130859375,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.07111111111111111,
134
+ "grad_norm": 29.787437438964844,
135
+ "learning_rate": 3.539823008849558e-06,
136
+ "logits/chosen": -0.443774938583374,
137
+ "logits/rejected": -0.45571577548980713,
138
+ "logps/chosen": -1.5804342031478882,
139
+ "logps/rejected": -22.606929779052734,
140
+ "loss": 0.202,
141
+ "rewards/accuracies": 0.9750000238418579,
142
+ "rewards/chosen": 17.302125930786133,
143
+ "rewards/margins": 2.174014091491699,
144
+ "rewards/rejected": 15.128110885620117,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.08,
149
+ "grad_norm": 23.14398193359375,
150
+ "learning_rate": 3.982300884955752e-06,
151
+ "logits/chosen": -0.3626072406768799,
152
+ "logits/rejected": -0.3787815570831299,
153
+ "logps/chosen": -2.203828811645508,
154
+ "logps/rejected": -29.433551788330078,
155
+ "loss": 0.2123,
156
+ "rewards/accuracies": 0.925000011920929,
157
+ "rewards/chosen": 17.00284194946289,
158
+ "rewards/margins": 2.320391893386841,
159
+ "rewards/rejected": 14.682450294494629,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.08888888888888889,
164
+ "grad_norm": 29.672739028930664,
165
+ "learning_rate": 4.424778761061948e-06,
166
+ "logits/chosen": -0.3035663962364197,
167
+ "logits/rejected": -0.31762221455574036,
168
+ "logps/chosen": -3.433589458465576,
169
+ "logps/rejected": -29.9322509765625,
170
+ "loss": 0.2592,
171
+ "rewards/accuracies": 0.9375,
172
+ "rewards/chosen": 16.929956436157227,
173
+ "rewards/margins": 2.31272029876709,
174
+ "rewards/rejected": 14.617237091064453,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.09777777777777778,
179
+ "grad_norm": 1.873722791671753,
180
+ "learning_rate": 4.867256637168142e-06,
181
+ "logits/chosen": -0.2679600715637207,
182
+ "logits/rejected": -0.2826440930366516,
183
+ "logps/chosen": -0.9653514623641968,
184
+ "logps/rejected": -30.235322952270508,
185
+ "loss": 0.1336,
186
+ "rewards/accuracies": 0.949999988079071,
187
+ "rewards/chosen": 17.462385177612305,
188
+ "rewards/margins": 3.1994175910949707,
189
+ "rewards/rejected": 14.26296615600586,
190
+ "step": 110
191
+ },
192
+ {
193
+ "epoch": 0.10666666666666667,
194
+ "grad_norm": 1.6913721561431885,
195
+ "learning_rate": 4.999409761242696e-06,
196
+ "logits/chosen": -0.22222033143043518,
197
+ "logits/rejected": -0.23720571398735046,
198
+ "logps/chosen": -4.4953508377075195,
199
+ "logps/rejected": -34.074745178222656,
200
+ "loss": 0.2552,
201
+ "rewards/accuracies": 0.8999999761581421,
202
+ "rewards/chosen": 17.04866600036621,
203
+ "rewards/margins": 3.1014418601989746,
204
+ "rewards/rejected": 13.947224617004395,
205
+ "step": 120
206
+ },
207
+ {
208
+ "epoch": 0.10666666666666667,
209
+ "eval_logits/chosen": -0.206527978181839,
210
+ "eval_logits/rejected": -0.22178640961647034,
211
+ "eval_logps/chosen": -3.69442081451416,
212
+ "eval_logps/rejected": -36.072166442871094,
213
+ "eval_loss": 0.18996010720729828,
214
+ "eval_rewards/accuracies": 0.9320000410079956,
215
+ "eval_rewards/chosen": 17.133451461791992,
216
+ "eval_rewards/margins": 3.379946708679199,
217
+ "eval_rewards/rejected": 13.753504753112793,
218
+ "eval_runtime": 361.5279,
219
+ "eval_samples_per_second": 2.766,
220
+ "eval_steps_per_second": 0.346,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.11555555555555555,
225
+ "grad_norm": 61.80262756347656,
226
+ "learning_rate": 4.996519466816778e-06,
227
+ "logits/chosen": -0.18473535776138306,
228
+ "logits/rejected": -0.1988501250743866,
229
+ "logps/chosen": -3.7009687423706055,
230
+ "logps/rejected": -39.289939880371094,
231
+ "loss": 0.1394,
232
+ "rewards/accuracies": 0.9624999761581421,
233
+ "rewards/chosen": 17.106964111328125,
234
+ "rewards/margins": 3.633338212966919,
235
+ "rewards/rejected": 13.473625183105469,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.12444444444444444,
240
+ "grad_norm": 1.6732702255249023,
241
+ "learning_rate": 4.9912234871722805e-06,
242
+ "logits/chosen": -0.16134041547775269,
243
+ "logits/rejected": -0.17547868192195892,
244
+ "logps/chosen": -3.0637736320495605,
245
+ "logps/rejected": -40.07548522949219,
246
+ "loss": 0.1408,
247
+ "rewards/accuracies": 0.9750000238418579,
248
+ "rewards/chosen": 17.392223358154297,
249
+ "rewards/margins": 4.242353439331055,
250
+ "rewards/rejected": 13.149867057800293,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.13333333333333333,
255
+ "grad_norm": 0.346453994512558,
256
+ "learning_rate": 4.98352692559805e-06,
257
+ "logits/chosen": -0.13797929883003235,
258
+ "logits/rejected": -0.15283086895942688,
259
+ "logps/chosen": -5.14492130279541,
260
+ "logps/rejected": -47.97212219238281,
261
+ "loss": 0.2153,
262
+ "rewards/accuracies": 0.9375,
263
+ "rewards/chosen": 16.896778106689453,
264
+ "rewards/margins": 4.227695465087891,
265
+ "rewards/rejected": 12.669081687927246,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.14222222222222222,
270
+ "grad_norm": 0.21871662139892578,
271
+ "learning_rate": 4.973437198621237e-06,
272
+ "logits/chosen": -0.12396670132875443,
273
+ "logits/rejected": -0.13780555129051208,
274
+ "logps/chosen": -6.108860015869141,
275
+ "logps/rejected": -54.90739440917969,
276
+ "loss": 0.0388,
277
+ "rewards/accuracies": 0.9750000238418579,
278
+ "rewards/chosen": 16.75935935974121,
279
+ "rewards/margins": 4.755282878875732,
280
+ "rewards/rejected": 12.004077911376953,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.1511111111111111,
285
+ "grad_norm": 235.12429809570312,
286
+ "learning_rate": 4.960964028860621e-06,
287
+ "logits/chosen": -0.1140839159488678,
288
+ "logits/rejected": -0.1263057291507721,
289
+ "logps/chosen": -12.605452537536621,
290
+ "logps/rejected": -53.81230926513672,
291
+ "loss": 0.4651,
292
+ "rewards/accuracies": 0.875,
293
+ "rewards/chosen": 16.101238250732422,
294
+ "rewards/margins": 3.9864249229431152,
295
+ "rewards/rejected": 12.114812850952148,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.16,
300
+ "grad_norm": 190.97048950195312,
301
+ "learning_rate": 4.946119435657738e-06,
302
+ "logits/chosen": -0.10746976733207703,
303
+ "logits/rejected": -0.11878640949726105,
304
+ "logps/chosen": -8.5105562210083,
305
+ "logps/rejected": -51.314781188964844,
306
+ "loss": 0.2362,
307
+ "rewards/accuracies": 0.925000011920929,
308
+ "rewards/chosen": 16.719980239868164,
309
+ "rewards/margins": 4.549674034118652,
310
+ "rewards/rejected": 12.170306205749512,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.16,
315
+ "eval_logits/chosen": -0.10870806127786636,
316
+ "eval_logits/rejected": -0.12223993986845016,
317
+ "eval_logps/chosen": -4.414996147155762,
318
+ "eval_logps/rejected": -53.885032653808594,
319
+ "eval_loss": 0.20236633718013763,
320
+ "eval_rewards/accuracies": 0.9510000944137573,
321
+ "eval_rewards/chosen": 17.06139373779297,
322
+ "eval_rewards/margins": 5.089176177978516,
323
+ "eval_rewards/rejected": 11.97221851348877,
324
+ "eval_runtime": 361.4355,
325
+ "eval_samples_per_second": 2.767,
326
+ "eval_steps_per_second": 0.346,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 0.1688888888888889,
331
+ "grad_norm": 56.81266784667969,
332
+ "learning_rate": 4.928917723494854e-06,
333
+ "logits/chosen": -0.10682469606399536,
334
+ "logits/rejected": -0.12124393880367279,
335
+ "logps/chosen": -3.058413028717041,
336
+ "logps/rejected": -55.052528381347656,
337
+ "loss": 0.2442,
338
+ "rewards/accuracies": 0.9500000476837158,
339
+ "rewards/chosen": 17.058589935302734,
340
+ "rewards/margins": 5.056097984313965,
341
+ "rewards/rejected": 12.002490043640137,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 0.17777777777777778,
346
+ "grad_norm": 175.06552124023438,
347
+ "learning_rate": 4.909375468210947e-06,
348
+ "logits/chosen": -0.10520349442958832,
349
+ "logits/rejected": -0.12018950283527374,
350
+ "logps/chosen": -4.114959716796875,
351
+ "logps/rejected": -55.9394645690918,
352
+ "loss": 0.1915,
353
+ "rewards/accuracies": 0.9500000476837158,
354
+ "rewards/chosen": 16.98603057861328,
355
+ "rewards/margins": 5.105838775634766,
356
+ "rewards/rejected": 11.880191802978516,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.18666666666666668,
361
+ "grad_norm": 78.06558990478516,
362
+ "learning_rate": 4.8875115010289655e-06,
363
+ "logits/chosen": -0.10475558042526245,
364
+ "logits/rejected": -0.11949175596237183,
365
+ "logps/chosen": -6.760301113128662,
366
+ "logps/rejected": -53.91607666015625,
367
+ "loss": 0.2843,
368
+ "rewards/accuracies": 0.9375,
369
+ "rewards/chosen": 16.857545852661133,
370
+ "rewards/margins": 4.917357921600342,
371
+ "rewards/rejected": 11.94018840789795,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.19555555555555557,
376
+ "grad_norm": 15.880486488342285,
377
+ "learning_rate": 4.863346890409768e-06,
378
+ "logits/chosen": -0.11213523149490356,
379
+ "logits/rejected": -0.12581588327884674,
380
+ "logps/chosen": -6.759585380554199,
381
+ "logps/rejected": -51.10936737060547,
382
+ "loss": 0.5104,
383
+ "rewards/accuracies": 0.875,
384
+ "rewards/chosen": 16.859071731567383,
385
+ "rewards/margins": 4.638372898101807,
386
+ "rewards/rejected": 12.220698356628418,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 0.20444444444444446,
391
+ "grad_norm": 46.97845458984375,
392
+ "learning_rate": 4.836904921750224e-06,
393
+ "logits/chosen": -0.11947059631347656,
394
+ "logits/rejected": -0.1329912692308426,
395
+ "logps/chosen": -3.608184814453125,
396
+ "logps/rejected": -48.794761657714844,
397
+ "loss": 0.2134,
398
+ "rewards/accuracies": 0.925000011920929,
399
+ "rewards/chosen": 17.235904693603516,
400
+ "rewards/margins": 4.859888076782227,
401
+ "rewards/rejected": 12.376014709472656,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 0.21333333333333335,
406
+ "grad_norm": 24.032859802246094,
407
+ "learning_rate": 4.808211074945042e-06,
408
+ "logits/chosen": -0.1200513243675232,
409
+ "logits/rejected": -0.1333036869764328,
410
+ "logps/chosen": -3.7552154064178467,
411
+ "logps/rejected": -49.87453079223633,
412
+ "loss": 0.1781,
413
+ "rewards/accuracies": 0.9500000476837158,
414
+ "rewards/chosen": 17.094650268554688,
415
+ "rewards/margins": 4.68077278137207,
416
+ "rewards/rejected": 12.41387939453125,
417
+ "step": 240
418
+ },
419
+ {
420
+ "epoch": 0.21333333333333335,
421
+ "eval_logits/chosen": -0.12433278560638428,
422
+ "eval_logits/rejected": -0.13808581233024597,
423
+ "eval_logps/chosen": -4.408891201019287,
424
+ "eval_logps/rejected": -50.744781494140625,
425
+ "eval_loss": 0.1546352356672287,
426
+ "eval_rewards/accuracies": 0.9500000476837158,
427
+ "eval_rewards/chosen": 17.06200408935547,
428
+ "eval_rewards/margins": 4.775761604309082,
429
+ "eval_rewards/rejected": 12.286243438720703,
430
+ "eval_runtime": 361.4974,
431
+ "eval_samples_per_second": 2.766,
432
+ "eval_steps_per_second": 0.346,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 0.2222222222222222,
437
+ "grad_norm": 0.25737640261650085,
438
+ "learning_rate": 4.7772929998339485e-06,
439
+ "logits/chosen": -0.12348780035972595,
440
+ "logits/rejected": -0.13704943656921387,
441
+ "logps/chosen": -4.4299187660217285,
442
+ "logps/rejected": -53.074607849121094,
443
+ "loss": 0.1373,
444
+ "rewards/accuracies": 0.9375,
445
+ "rewards/chosen": 17.087068557739258,
446
+ "rewards/margins": 5.06691837310791,
447
+ "rewards/rejected": 12.020149230957031,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 0.2311111111111111,
452
+ "grad_norm": 0.1839389204978943,
453
+ "learning_rate": 4.744180489557859e-06,
454
+ "logits/chosen": -0.12177034467458725,
455
+ "logits/rejected": -0.1342695653438568,
456
+ "logps/chosen": -3.775188446044922,
457
+ "logps/rejected": -53.98720932006836,
458
+ "loss": 0.1896,
459
+ "rewards/accuracies": 0.949999988079071,
460
+ "rewards/chosen": 17.12021255493164,
461
+ "rewards/margins": 5.148064613342285,
462
+ "rewards/rejected": 11.972146987915039,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 0.24,
467
+ "grad_norm": 12.258485794067383,
468
+ "learning_rate": 4.708905451849754e-06,
469
+ "logits/chosen": -0.11067859083414078,
470
+ "logits/rejected": -0.12377731502056122,
471
+ "logps/chosen": -6.418317794799805,
472
+ "logps/rejected": -56.57402801513672,
473
+ "loss": 0.2315,
474
+ "rewards/accuracies": 0.9375,
475
+ "rewards/chosen": 16.738832473754883,
476
+ "rewards/margins": 4.884931564331055,
477
+ "rewards/rejected": 11.853900909423828,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 0.24888888888888888,
482
+ "grad_norm": 77.56194305419922,
483
+ "learning_rate": 4.671501878287879e-06,
484
+ "logits/chosen": -0.1184445172548294,
485
+ "logits/rejected": -0.1339874565601349,
486
+ "logps/chosen": -10.12116527557373,
487
+ "logps/rejected": -53.403907775878906,
488
+ "loss": 0.5343,
489
+ "rewards/accuracies": 0.862500011920929,
490
+ "rewards/chosen": 16.458633422851562,
491
+ "rewards/margins": 4.402472496032715,
492
+ "rewards/rejected": 12.056160926818848,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.2577777777777778,
497
+ "grad_norm": 67.53883361816406,
498
+ "learning_rate": 4.6320058115409295e-06,
499
+ "logits/chosen": -0.1448262631893158,
500
+ "logits/rejected": -0.15793387591838837,
501
+ "logps/chosen": -3.4666190147399902,
502
+ "logps/rejected": -48.79213333129883,
503
+ "loss": 0.5017,
504
+ "rewards/accuracies": 0.887499988079071,
505
+ "rewards/chosen": 16.945899963378906,
506
+ "rewards/margins": 4.2686333656311035,
507
+ "rewards/rejected": 12.677268028259277,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.26666666666666666,
512
+ "grad_norm": 0.17521341145038605,
513
+ "learning_rate": 4.590455310636778e-06,
514
+ "logits/chosen": -0.16128253936767578,
515
+ "logits/rejected": -0.17375555634498596,
516
+ "logps/chosen": -2.9032950401306152,
517
+ "logps/rejected": -47.69734191894531,
518
+ "loss": 0.265,
519
+ "rewards/accuracies": 0.925000011920929,
520
+ "rewards/chosen": 17.18383026123047,
521
+ "rewards/margins": 4.541309356689453,
522
+ "rewards/rejected": 12.642518997192383,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.26666666666666666,
527
+ "eval_logits/chosen": -0.17444846034049988,
528
+ "eval_logits/rejected": -0.18559777736663818,
529
+ "eval_logps/chosen": -2.535512924194336,
530
+ "eval_logps/rejected": -47.16367721557617,
531
+ "eval_loss": 0.15360687673091888,
532
+ "eval_rewards/accuracies": 0.9440000653266907,
533
+ "eval_rewards/chosen": 17.249343872070312,
534
+ "eval_rewards/margins": 4.604989051818848,
535
+ "eval_rewards/rejected": 12.644353866577148,
536
+ "eval_runtime": 361.4575,
537
+ "eval_samples_per_second": 2.767,
538
+ "eval_steps_per_second": 0.346,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 0.27555555555555555,
543
+ "grad_norm": 0.5040452480316162,
544
+ "learning_rate": 4.54689041428819e-06,
545
+ "logits/chosen": -0.16974106431007385,
546
+ "logits/rejected": -0.1810058057308197,
547
+ "logps/chosen": -1.233938217163086,
548
+ "logps/rejected": -49.907745361328125,
549
+ "loss": 0.1132,
550
+ "rewards/accuracies": 0.9500000476837158,
551
+ "rewards/chosen": 17.34117889404297,
552
+ "rewards/margins": 4.934173583984375,
553
+ "rewards/rejected": 12.407005310058594,
554
+ "step": 310
555
+ },
556
+ {
557
+ "epoch": 0.28444444444444444,
558
+ "grad_norm": 100.02949523925781,
559
+ "learning_rate": 4.501353102310901e-06,
560
+ "logits/chosen": -0.15705889463424683,
561
+ "logits/rejected": -0.1695334017276764,
562
+ "logps/chosen": -1.0820492506027222,
563
+ "logps/rejected": -52.577110290527344,
564
+ "loss": 0.1194,
565
+ "rewards/accuracies": 0.9500000476837158,
566
+ "rewards/chosen": 17.33388900756836,
567
+ "rewards/margins": 5.154760837554932,
568
+ "rewards/rejected": 12.179126739501953,
569
+ "step": 320
570
+ },
571
+ {
572
+ "epoch": 0.29333333333333333,
573
+ "grad_norm": 0.2689219117164612,
574
+ "learning_rate": 4.453887255171206e-06,
575
+ "logits/chosen": -0.13849371671676636,
576
+ "logits/rejected": -0.14990833401679993,
577
+ "logps/chosen": -1.8435032367706299,
578
+ "logps/rejected": -54.79044723510742,
579
+ "loss": 0.0926,
580
+ "rewards/accuracies": 0.9500000476837158,
581
+ "rewards/chosen": 17.2423095703125,
582
+ "rewards/margins": 5.28987979888916,
583
+ "rewards/rejected": 11.952428817749023,
584
+ "step": 330
585
+ },
586
+ {
587
+ "epoch": 0.3022222222222222,
588
+ "grad_norm": 0.09305431693792343,
589
+ "learning_rate": 4.404538611702055e-06,
590
+ "logits/chosen": -0.12299702316522598,
591
+ "logits/rejected": -0.13453055918216705,
592
+ "logps/chosen": -2.9897143840789795,
593
+ "logps/rejected": -52.954498291015625,
594
+ "loss": 0.2873,
595
+ "rewards/accuracies": 0.925000011920929,
596
+ "rewards/chosen": 17.17474365234375,
597
+ "rewards/margins": 5.071004867553711,
598
+ "rewards/rejected": 12.103739738464355,
599
+ "step": 340
600
+ },
601
+ {
602
+ "epoch": 0.3111111111111111,
603
+ "grad_norm": 59.282073974609375,
604
+ "learning_rate": 4.3533547250284015e-06,
605
+ "logits/chosen": -0.11913029849529266,
606
+ "logits/rejected": -0.12785324454307556,
607
+ "logps/chosen": -3.9456872940063477,
608
+ "logps/rejected": -48.68487548828125,
609
+ "loss": 0.4332,
610
+ "rewards/accuracies": 0.875,
611
+ "rewards/chosen": 17.12805938720703,
612
+ "rewards/margins": 4.669450283050537,
613
+ "rewards/rejected": 12.458610534667969,
614
+ "step": 350
615
+ },
616
+ {
617
+ "epoch": 0.32,
618
+ "grad_norm": 0.31101909279823303,
619
+ "learning_rate": 4.300384916744261e-06,
620
+ "logits/chosen": -0.11280188709497452,
621
+ "logits/rejected": -0.12300585210323334,
622
+ "logps/chosen": -2.1714723110198975,
623
+ "logps/rejected": -54.74174118041992,
624
+ "loss": 0.1605,
625
+ "rewards/accuracies": 0.9500000476837158,
626
+ "rewards/chosen": 17.326162338256836,
627
+ "rewards/margins": 5.467062473297119,
628
+ "rewards/rejected": 11.859098434448242,
629
+ "step": 360
630
+ },
631
+ {
632
+ "epoch": 0.32,
633
+ "eval_logits/chosen": -0.10620756447315216,
634
+ "eval_logits/rejected": -0.11727114766836166,
635
+ "eval_logps/chosen": -1.4165427684783936,
636
+ "eval_logps/rejected": -50.9525146484375,
637
+ "eval_loss": 0.3194349706172943,
638
+ "eval_rewards/accuracies": 0.9210000038146973,
639
+ "eval_rewards/chosen": 17.36124038696289,
640
+ "eval_rewards/margins": 5.095769882202148,
641
+ "eval_rewards/rejected": 12.26546859741211,
642
+ "eval_runtime": 361.5072,
643
+ "eval_samples_per_second": 2.766,
644
+ "eval_steps_per_second": 0.346,
645
+ "step": 360
646
+ },
647
+ {
648
+ "epoch": 0.3288888888888889,
649
+ "grad_norm": 6.1126532554626465,
650
+ "learning_rate": 4.24568022938566e-06,
651
+ "logits/chosen": -0.10354311764240265,
652
+ "logits/rejected": -0.11526636779308319,
653
+ "logps/chosen": -1.2935255765914917,
654
+ "logps/rejected": -55.57566833496094,
655
+ "loss": 0.1711,
656
+ "rewards/accuracies": 0.9500000476837158,
657
+ "rewards/chosen": 17.439346313476562,
658
+ "rewards/margins": 5.700921058654785,
659
+ "rewards/rejected": 11.738424301147461,
660
+ "step": 370
661
+ },
662
+ {
663
+ "epoch": 0.3377777777777778,
664
+ "grad_norm": 34.15927505493164,
665
+ "learning_rate": 4.189293377245241e-06,
666
+ "logits/chosen": -0.1029932051897049,
667
+ "logits/rejected": -0.11382515728473663,
668
+ "logps/chosen": -2.5132687091827393,
669
+ "logps/rejected": -55.50346374511719,
670
+ "loss": 0.4359,
671
+ "rewards/accuracies": 0.8875000476837158,
672
+ "rewards/chosen": 16.731037139892578,
673
+ "rewards/margins": 4.368172645568848,
674
+ "rewards/rejected": 12.362865447998047,
675
+ "step": 380
676
+ },
677
+ {
678
+ "epoch": 0.3466666666666667,
679
+ "grad_norm": 2.8422904014587402,
680
+ "learning_rate": 4.131278695575952e-06,
681
+ "logits/chosen": -0.10793520510196686,
682
+ "logits/rejected": -0.12109285593032837,
683
+ "logps/chosen": -3.014652729034424,
684
+ "logps/rejected": -53.98411560058594,
685
+ "loss": 0.2161,
686
+ "rewards/accuracies": 0.949999988079071,
687
+ "rewards/chosen": 17.137393951416016,
688
+ "rewards/margins": 5.105995178222656,
689
+ "rewards/rejected": 12.03139877319336,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 0.35555555555555557,
694
+ "grad_norm": 54.0329475402832,
695
+ "learning_rate": 4.071692088232743e-06,
696
+ "logits/chosen": -0.10393750667572021,
697
+ "logits/rejected": -0.11834606528282166,
698
+ "logps/chosen": -2.1508543491363525,
699
+ "logps/rejected": -45.60733413696289,
700
+ "loss": 0.2077,
701
+ "rewards/accuracies": 0.925000011920929,
702
+ "rewards/chosen": 17.586124420166016,
703
+ "rewards/margins": 5.077212333679199,
704
+ "rewards/rejected": 12.5089111328125,
705
+ "step": 400
706
+ },
707
+ {
708
+ "epoch": 0.36444444444444446,
709
+ "grad_norm": 81.61144256591797,
710
+ "learning_rate": 4.010590973802737e-06,
711
+ "logits/chosen": -0.09564584493637085,
712
+ "logits/rejected": -0.10617707669734955,
713
+ "logps/chosen": -3.4572842121124268,
714
+ "logps/rejected": -50.92162322998047,
715
+ "loss": 0.2478,
716
+ "rewards/accuracies": 0.8875000476837158,
717
+ "rewards/chosen": 17.010910034179688,
718
+ "rewards/margins": 4.556198596954346,
719
+ "rewards/rejected": 12.454713821411133,
720
+ "step": 410
721
+ },
722
+ {
723
+ "epoch": 0.37333333333333335,
724
+ "grad_norm": 0.30974289774894714,
725
+ "learning_rate": 3.948034230275781e-06,
726
+ "logits/chosen": -0.09134417027235031,
727
+ "logits/rejected": -0.1020016297698021,
728
+ "logps/chosen": -5.046698570251465,
729
+ "logps/rejected": -48.908958435058594,
730
+ "loss": 0.2894,
731
+ "rewards/accuracies": 0.8999999761581421,
732
+ "rewards/chosen": 17.007888793945312,
733
+ "rewards/margins": 4.53641414642334,
734
+ "rewards/rejected": 12.471475601196289,
735
+ "step": 420
736
+ },
737
+ {
738
+ "epoch": 0.37333333333333335,
739
+ "eval_logits/chosen": -0.09054450690746307,
740
+ "eval_logits/rejected": -0.10264354199171066,
741
+ "eval_logps/chosen": -1.913105845451355,
742
+ "eval_logps/rejected": -51.11127471923828,
743
+ "eval_loss": 0.16789735853672028,
744
+ "eval_rewards/accuracies": 0.9450000524520874,
745
+ "eval_rewards/chosen": 17.311582565307617,
746
+ "eval_rewards/margins": 5.061989784240723,
747
+ "eval_rewards/rejected": 12.249593734741211,
748
+ "eval_runtime": 361.5337,
749
+ "eval_samples_per_second": 2.766,
750
+ "eval_steps_per_second": 0.346,
751
+ "step": 420
752
+ },
753
+ {
754
+ "epoch": 0.38222222222222224,
755
+ "grad_norm": 12.824393272399902,
756
+ "learning_rate": 3.884082138308699e-06,
757
+ "logits/chosen": -0.08666776865720749,
758
+ "logits/rejected": -0.0997733399271965,
759
+ "logps/chosen": -1.7306327819824219,
760
+ "logps/rejected": -54.273292541503906,
761
+ "loss": 0.2298,
762
+ "rewards/accuracies": 0.9500000476837158,
763
+ "rewards/chosen": 17.167621612548828,
764
+ "rewards/margins": 5.065673351287842,
765
+ "rewards/rejected": 12.101947784423828,
766
+ "step": 430
767
+ },
768
+ {
769
+ "epoch": 0.39111111111111113,
770
+ "grad_norm": 0.30713599920272827,
771
+ "learning_rate": 3.818796323137896e-06,
772
+ "logits/chosen": -0.09174907952547073,
773
+ "logits/rejected": -0.10376611351966858,
774
+ "logps/chosen": -1.489154577255249,
775
+ "logps/rejected": -54.580726623535156,
776
+ "loss": 0.2513,
777
+ "rewards/accuracies": 0.9375,
778
+ "rewards/chosen": 17.22280502319336,
779
+ "rewards/margins": 5.175349235534668,
780
+ "rewards/rejected": 12.047454833984375,
781
+ "step": 440
782
+ },
783
+ {
784
+ "epoch": 0.4,
785
+ "grad_norm": 87.4791488647461,
786
+ "learning_rate": 3.7522396951963303e-06,
787
+ "logits/chosen": -0.09688778221607208,
788
+ "logits/rejected": -0.10897806286811829,
789
+ "logps/chosen": -3.157695770263672,
790
+ "logps/rejected": -50.96417236328125,
791
+ "loss": 0.1758,
792
+ "rewards/accuracies": 0.9500000476837158,
793
+ "rewards/chosen": 17.345651626586914,
794
+ "rewards/margins": 5.245656967163086,
795
+ "rewards/rejected": 12.099993705749512,
796
+ "step": 450
797
+ },
798
+ {
799
+ "epoch": 0.4088888888888889,
800
+ "grad_norm": 146.2008056640625,
801
+ "learning_rate": 3.684476389492026e-06,
802
+ "logits/chosen": -0.09378582239151001,
803
+ "logits/rejected": -0.10475654900074005,
804
+ "logps/chosen": -0.5611928701400757,
805
+ "logps/rejected": -56.518890380859375,
806
+ "loss": 0.1981,
807
+ "rewards/accuracies": 0.9500000476837158,
808
+ "rewards/chosen": 17.113712310791016,
809
+ "rewards/margins": 5.068872928619385,
810
+ "rewards/rejected": 12.044839859008789,
811
+ "step": 460
812
+ },
813
+ {
814
+ "epoch": 0.4177777777777778,
815
+ "grad_norm": 1.9137721061706543,
816
+ "learning_rate": 3.6155717038065783e-06,
817
+ "logits/chosen": -0.08695463836193085,
818
+ "logits/rejected": -0.09596743434667587,
819
+ "logps/chosen": -1.5298550128936768,
820
+ "logps/rejected": -50.27445983886719,
821
+ "loss": 0.2066,
822
+ "rewards/accuracies": 0.9375,
823
+ "rewards/chosen": 17.35186004638672,
824
+ "rewards/margins": 5.014693260192871,
825
+ "rewards/rejected": 12.337167739868164,
826
+ "step": 470
827
+ },
828
+ {
829
+ "epoch": 0.4266666666666667,
830
+ "grad_norm": 84.80391693115234,
831
+ "learning_rate": 3.545592035773192e-06,
832
+ "logits/chosen": -0.0746893435716629,
833
+ "logits/rejected": -0.08653923869132996,
834
+ "logps/chosen": -2.0052125453948975,
835
+ "logps/rejected": -57.502811431884766,
836
+ "loss": 0.1149,
837
+ "rewards/accuracies": 0.9500000476837158,
838
+ "rewards/chosen": 17.14373016357422,
839
+ "rewards/margins": 5.360415935516357,
840
+ "rewards/rejected": 11.783313751220703,
841
+ "step": 480
842
+ },
843
+ {
844
+ "epoch": 0.4266666666666667,
845
+ "eval_logits/chosen": -0.07700399309396744,
846
+ "eval_logits/rejected": -0.08828537166118622,
847
+ "eval_logps/chosen": -4.48896598815918,
848
+ "eval_logps/rejected": -53.76282501220703,
849
+ "eval_loss": 0.29511645436286926,
850
+ "eval_rewards/accuracies": 0.9230000376701355,
851
+ "eval_rewards/chosen": 17.053997039794922,
852
+ "eval_rewards/margins": 5.069558143615723,
853
+ "eval_rewards/rejected": 11.984437942504883,
854
+ "eval_runtime": 361.5035,
855
+ "eval_samples_per_second": 2.766,
856
+ "eval_steps_per_second": 0.346,
857
+ "step": 480
858
+ },
859
+ {
860
+ "epoch": 0.43555555555555553,
861
+ "grad_norm": 82.9616470336914,
862
+ "learning_rate": 3.4746048188948806e-06,
863
+ "logits/chosen": -0.06675051152706146,
864
+ "logits/rejected": -0.07860895991325378,
865
+ "logps/chosen": -4.162237167358398,
866
+ "logps/rejected": -54.77789306640625,
867
+ "loss": 0.2979,
868
+ "rewards/accuracies": 0.9125000238418579,
869
+ "rewards/chosen": 17.047603607177734,
870
+ "rewards/margins": 5.138361930847168,
871
+ "rewards/rejected": 11.909242630004883,
872
+ "step": 490
873
+ },
874
+ {
875
+ "epoch": 0.4444444444444444,
876
+ "grad_norm": 0.04293210059404373,
877
+ "learning_rate": 3.4026784575644887e-06,
878
+ "logits/chosen": -0.06424491107463837,
879
+ "logits/rejected": -0.07567107677459717,
880
+ "logps/chosen": -2.05729603767395,
881
+ "logps/rejected": -56.646087646484375,
882
+ "loss": 0.4378,
883
+ "rewards/accuracies": 0.8875000476837158,
884
+ "rewards/chosen": 16.947803497314453,
885
+ "rewards/margins": 4.919981956481934,
886
+ "rewards/rejected": 12.02782154083252,
887
+ "step": 500
888
+ },
889
+ {
890
+ "epoch": 0.4533333333333333,
891
+ "grad_norm": 0.07136644423007965,
892
+ "learning_rate": 3.329882261149148e-06,
893
+ "logits/chosen": -0.06423303484916687,
894
+ "logits/rejected": -0.07512776553630829,
895
+ "logps/chosen": -3.1519265174865723,
896
+ "logps/rejected": -53.53008270263672,
897
+ "loss": 0.2613,
898
+ "rewards/accuracies": 0.9375,
899
+ "rewards/chosen": 17.112144470214844,
900
+ "rewards/margins": 5.046430587768555,
901
+ "rewards/rejected": 12.065712928771973,
902
+ "step": 510
903
+ },
904
+ {
905
+ "epoch": 0.4622222222222222,
906
+ "grad_norm": 0.29279613494873047,
907
+ "learning_rate": 3.25628637720269e-06,
908
+ "logits/chosen": -0.060233693569898605,
909
+ "logits/rejected": -0.07076811790466309,
910
+ "logps/chosen": -1.2358124256134033,
911
+ "logps/rejected": -50.61806106567383,
912
+ "loss": 0.1517,
913
+ "rewards/accuracies": 0.9375,
914
+ "rewards/chosen": 17.457351684570312,
915
+ "rewards/margins": 5.2062835693359375,
916
+ "rewards/rejected": 12.251070022583008,
917
+ "step": 520
918
+ },
919
+ {
920
+ "epoch": 0.4711111111111111,
921
+ "grad_norm": 0.7946074604988098,
922
+ "learning_rate": 3.181961723870359e-06,
923
+ "logits/chosen": -0.054482050240039825,
924
+ "logits/rejected": -0.0657092034816742,
925
+ "logps/chosen": -0.7682158946990967,
926
+ "logps/rejected": -58.81409454345703,
927
+ "loss": 0.2304,
928
+ "rewards/accuracies": 0.9375,
929
+ "rewards/chosen": 17.075973510742188,
930
+ "rewards/margins": 5.222441673278809,
931
+ "rewards/rejected": 11.853530883789062,
932
+ "step": 530
933
+ },
934
+ {
935
+ "epoch": 0.48,
936
+ "grad_norm": 3.1405210494995117,
937
+ "learning_rate": 3.1069799215509847e-06,
938
+ "logits/chosen": -0.05050881579518318,
939
+ "logits/rejected": -0.061149902641773224,
940
+ "logps/chosen": -0.954046368598938,
941
+ "logps/rejected": -52.70227813720703,
942
+ "loss": 0.0384,
943
+ "rewards/accuracies": 0.987500011920929,
944
+ "rewards/chosen": 17.378849029541016,
945
+ "rewards/margins": 5.253483295440674,
946
+ "rewards/rejected": 12.125364303588867,
947
+ "step": 540
948
+ },
949
+ {
950
+ "epoch": 0.48,
951
+ "eval_logits/chosen": -0.05120665580034256,
952
+ "eval_logits/rejected": -0.06123337894678116,
953
+ "eval_logps/chosen": -2.9872913360595703,
954
+ "eval_logps/rejected": -52.27314758300781,
955
+ "eval_loss": 0.17387841641902924,
956
+ "eval_rewards/accuracies": 0.9490000605583191,
957
+ "eval_rewards/chosen": 17.204164505004883,
958
+ "eval_rewards/margins": 5.070757865905762,
959
+ "eval_rewards/rejected": 12.133406639099121,
960
+ "eval_runtime": 361.5449,
961
+ "eval_samples_per_second": 2.766,
962
+ "eval_steps_per_second": 0.346,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.4888888888888889,
967
+ "grad_norm": 0.07270358502864838,
968
+ "learning_rate": 3.0314132238824416e-06,
969
+ "logits/chosen": -0.05125313252210617,
970
+ "logits/rejected": -0.06174170970916748,
971
+ "logps/chosen": -3.2163877487182617,
972
+ "logps/rejected": -47.79279327392578,
973
+ "loss": 0.2087,
974
+ "rewards/accuracies": 0.9125000238418579,
975
+ "rewards/chosen": 17.408517837524414,
976
+ "rewards/margins": 5.061524391174316,
977
+ "rewards/rejected": 12.346992492675781,
978
+ "step": 550
979
+ },
980
+ {
981
+ "epoch": 0.49777777777777776,
982
+ "grad_norm": 0.10005924850702286,
983
+ "learning_rate": 2.955334448116915e-06,
984
+ "logits/chosen": -0.041773442178964615,
985
+ "logits/rejected": -0.05364570394158363,
986
+ "logps/chosen": -0.363404780626297,
987
+ "logps/rejected": -56.32415008544922,
988
+ "loss": 0.0969,
989
+ "rewards/accuracies": 0.9624999761581421,
990
+ "rewards/chosen": 17.3824462890625,
991
+ "rewards/margins": 5.5404510498046875,
992
+ "rewards/rejected": 11.841995239257812,
993
+ "step": 560
994
+ },
995
+ {
996
+ "epoch": 0.5066666666666667,
997
+ "grad_norm": 7.818356990814209,
998
+ "learning_rate": 2.8788169049530533e-06,
999
+ "logits/chosen": -0.04309462010860443,
1000
+ "logits/rejected": -0.05494442582130432,
1001
+ "logps/chosen": -2.2242724895477295,
1002
+ "logps/rejected": -56.444740295410156,
1003
+ "loss": 0.1447,
1004
+ "rewards/accuracies": 0.949999988079071,
1005
+ "rewards/chosen": 17.275390625,
1006
+ "rewards/margins": 5.55007791519165,
1007
+ "rewards/rejected": 11.725313186645508,
1008
+ "step": 570
1009
+ },
1010
+ {
1011
+ "epoch": 0.5155555555555555,
1012
+ "grad_norm": 0.03519747406244278,
1013
+ "learning_rate": 2.8019343278926397e-06,
1014
+ "logits/chosen": -0.03506368771195412,
1015
+ "logits/rejected": -0.046854715794324875,
1016
+ "logps/chosen": -0.5200096964836121,
1017
+ "logps/rejected": -59.05330276489258,
1018
+ "loss": 0.0998,
1019
+ "rewards/accuracies": 0.949999988079071,
1020
+ "rewards/chosen": 17.320554733276367,
1021
+ "rewards/margins": 5.727260589599609,
1022
+ "rewards/rejected": 11.593294143676758,
1023
+ "step": 580
1024
+ },
1025
+ {
1026
+ "epoch": 0.5244444444444445,
1027
+ "grad_norm": 0.04519123584032059,
1028
+ "learning_rate": 2.7247608021898265e-06,
1029
+ "logits/chosen": -0.03204537555575371,
1030
+ "logits/rejected": -0.04383891448378563,
1031
+ "logps/chosen": -1.1271060705184937,
1032
+ "logps/rejected": -59.308895111083984,
1033
+ "loss": 0.1162,
1034
+ "rewards/accuracies": 0.9750000238418579,
1035
+ "rewards/chosen": 17.256351470947266,
1036
+ "rewards/margins": 5.6881890296936035,
1037
+ "rewards/rejected": 11.568161010742188,
1038
+ "step": 590
1039
+ },
1040
+ {
1041
+ "epoch": 0.5333333333333333,
1042
+ "grad_norm": 0.028689857572317123,
1043
+ "learning_rate": 2.647370693461432e-06,
1044
+ "logits/chosen": -0.02834726870059967,
1045
+ "logits/rejected": -0.03827046602964401,
1046
+ "logps/chosen": -5.673943519592285,
1047
+ "logps/rejected": -55.72624588012695,
1048
+ "loss": 0.4008,
1049
+ "rewards/accuracies": 0.8875000476837158,
1050
+ "rewards/chosen": 16.761056900024414,
1051
+ "rewards/margins": 4.804043769836426,
1052
+ "rewards/rejected": 11.957012176513672,
1053
+ "step": 600
1054
+ },
1055
+ {
1056
+ "epoch": 0.5333333333333333,
1057
+ "eval_logits/chosen": -0.035840023308992386,
1058
+ "eval_logits/rejected": -0.04687971621751785,
1059
+ "eval_logps/chosen": -2.1760435104370117,
1060
+ "eval_logps/rejected": -56.62664031982422,
1061
+ "eval_loss": 0.17061151564121246,
1062
+ "eval_rewards/accuracies": 0.9470000267028809,
1063
+ "eval_rewards/chosen": 17.285289764404297,
1064
+ "eval_rewards/margins": 5.587231636047363,
1065
+ "eval_rewards/rejected": 11.698057174682617,
1066
+ "eval_runtime": 361.5056,
1067
+ "eval_samples_per_second": 2.766,
1068
+ "eval_steps_per_second": 0.346,
1069
+ "step": 600
1070
+ },
1071
+ {
1072
+ "epoch": 0.5422222222222223,
1073
+ "grad_norm": 1.1994622945785522,
1074
+ "learning_rate": 2.569838576027068e-06,
1075
+ "logits/chosen": -0.03231767192482948,
1076
+ "logits/rejected": -0.04397805407643318,
1077
+ "logps/chosen": -1.5904741287231445,
1078
+ "logps/rejected": -62.10520553588867,
1079
+ "loss": 0.1265,
1080
+ "rewards/accuracies": 0.9624999761581421,
1081
+ "rewards/chosen": 17.11206817626953,
1082
+ "rewards/margins": 5.734784126281738,
1083
+ "rewards/rejected": 11.37728500366211,
1084
+ "step": 610
1085
+ },
1086
+ {
1087
+ "epoch": 0.5511111111111111,
1088
+ "grad_norm": 0.14163845777511597,
1089
+ "learning_rate": 2.4922391610481544e-06,
1090
+ "logits/chosen": -0.03293662518262863,
1091
+ "logits/rejected": -0.04335154965519905,
1092
+ "logps/chosen": -1.3568997383117676,
1093
+ "logps/rejected": -58.347434997558594,
1094
+ "loss": 0.097,
1095
+ "rewards/accuracies": 0.9624999761581421,
1096
+ "rewards/chosen": 17.345399856567383,
1097
+ "rewards/margins": 5.823373794555664,
1098
+ "rewards/rejected": 11.522026062011719,
1099
+ "step": 620
1100
+ },
1101
+ {
1102
+ "epoch": 0.56,
1103
+ "grad_norm": 1.125027060508728,
1104
+ "learning_rate": 2.4146472245350804e-06,
1105
+ "logits/chosen": -0.02864963933825493,
1106
+ "logits/rejected": -0.03835710883140564,
1107
+ "logps/chosen": -2.494175434112549,
1108
+ "logps/rejected": -55.33067321777344,
1109
+ "loss": 0.3115,
1110
+ "rewards/accuracies": 0.925000011920929,
1111
+ "rewards/chosen": 17.262205123901367,
1112
+ "rewards/margins": 5.4287261962890625,
1113
+ "rewards/rejected": 11.833479881286621,
1114
+ "step": 630
1115
+ },
1116
+ {
1117
+ "epoch": 0.5688888888888889,
1118
+ "grad_norm": 0.03514016419649124,
1119
+ "learning_rate": 2.337137535291868e-06,
1120
+ "logits/chosen": -0.02757749892771244,
1121
+ "logits/rejected": -0.0376611053943634,
1122
+ "logps/chosen": -2.3163387775421143,
1123
+ "logps/rejected": -53.54579162597656,
1124
+ "loss": 0.1268,
1125
+ "rewards/accuracies": 0.9500000476837158,
1126
+ "rewards/chosen": 17.542863845825195,
1127
+ "rewards/margins": 5.799897193908691,
1128
+ "rewards/rejected": 11.742965698242188,
1129
+ "step": 640
1130
+ },
1131
+ {
1132
+ "epoch": 0.5777777777777777,
1133
+ "grad_norm": 34.20791244506836,
1134
+ "learning_rate": 2.259784782867782e-06,
1135
+ "logits/chosen": -0.02032250165939331,
1136
+ "logits/rejected": -0.031542714685201645,
1137
+ "logps/chosen": -1.7910137176513672,
1138
+ "logps/rejected": -62.980018615722656,
1139
+ "loss": 0.1831,
1140
+ "rewards/accuracies": 0.9500000476837158,
1141
+ "rewards/chosen": 16.982879638671875,
1142
+ "rewards/margins": 5.589078903198242,
1143
+ "rewards/rejected": 11.393800735473633,
1144
+ "step": 650
1145
+ },
1146
+ {
1147
+ "epoch": 0.5866666666666667,
1148
+ "grad_norm": 32.356956481933594,
1149
+ "learning_rate": 2.182663505585314e-06,
1150
+ "logits/chosen": -0.01711965538561344,
1151
+ "logits/rejected": -0.028038471937179565,
1152
+ "logps/chosen": -2.662904739379883,
1153
+ "logps/rejected": -63.326297760009766,
1154
+ "loss": 0.1678,
1155
+ "rewards/accuracies": 0.9624999761581421,
1156
+ "rewards/chosen": 16.997777938842773,
1157
+ "rewards/margins": 5.741157531738281,
1158
+ "rewards/rejected": 11.256620407104492,
1159
+ "step": 660
1160
+ },
1161
+ {
1162
+ "epoch": 0.5866666666666667,
1163
+ "eval_logits/chosen": -0.016018809750676155,
1164
+ "eval_logits/rejected": -0.027020033448934555,
1165
+ "eval_logps/chosen": -3.00822114944458,
1166
+ "eval_logps/rejected": -57.951629638671875,
1167
+ "eval_loss": 0.2050127536058426,
1168
+ "eval_rewards/accuracies": 0.9450000524520874,
1169
+ "eval_rewards/chosen": 17.202072143554688,
1170
+ "eval_rewards/margins": 5.636512756347656,
1171
+ "eval_rewards/rejected": 11.565557479858398,
1172
+ "eval_runtime": 361.5073,
1173
+ "eval_samples_per_second": 2.766,
1174
+ "eval_steps_per_second": 0.346,
1175
+ "step": 660
1176
+ },
1177
+ {
1178
+ "epoch": 0.5955555555555555,
1179
+ "grad_norm": 86.53874969482422,
1180
+ "learning_rate": 2.1058480187138863e-06,
1181
+ "logits/chosen": -0.012352555990219116,
1182
+ "logits/rejected": -0.023966707289218903,
1183
+ "logps/chosen": -2.129209041595459,
1184
+ "logps/rejected": -62.1393928527832,
1185
+ "loss": 0.1025,
1186
+ "rewards/accuracies": 0.9624999761581421,
1187
+ "rewards/chosen": 17.069538116455078,
1188
+ "rewards/margins": 5.678930759429932,
1189
+ "rewards/rejected": 11.390605926513672,
1190
+ "step": 670
1191
+ },
1192
+ {
1193
+ "epoch": 0.6044444444444445,
1194
+ "grad_norm": 2.892672300338745,
1195
+ "learning_rate": 2.0294123428584985e-06,
1196
+ "logits/chosen": -0.011562807485461235,
1197
+ "logits/rejected": -0.020859256386756897,
1198
+ "logps/chosen": -3.2486608028411865,
1199
+ "logps/rejected": -59.56721496582031,
1200
+ "loss": 0.1961,
1201
+ "rewards/accuracies": 0.9500000476837158,
1202
+ "rewards/chosen": 17.051376342773438,
1203
+ "rewards/margins": 5.515361785888672,
1204
+ "rewards/rejected": 11.536016464233398,
1205
+ "step": 680
1206
+ },
1207
+ {
1208
+ "epoch": 0.6133333333333333,
1209
+ "grad_norm": 30.26588249206543,
1210
+ "learning_rate": 1.953430132632311e-06,
1211
+ "logits/chosen": -0.011488726362586021,
1212
+ "logits/rejected": -0.021527227014303207,
1213
+ "logps/chosen": -1.794357180595398,
1214
+ "logps/rejected": -60.649505615234375,
1215
+ "loss": 0.2821,
1216
+ "rewards/accuracies": 0.9375,
1217
+ "rewards/chosen": 17.114843368530273,
1218
+ "rewards/margins": 5.612217903137207,
1219
+ "rewards/rejected": 11.50262451171875,
1220
+ "step": 690
1221
+ },
1222
+ {
1223
+ "epoch": 0.6222222222222222,
1224
+ "grad_norm": 0.030314341187477112,
1225
+ "learning_rate": 1.8779746056819104e-06,
1226
+ "logits/chosen": -0.014436552301049232,
1227
+ "logits/rejected": -0.026044374331831932,
1228
+ "logps/chosen": -3.1617178916931152,
1229
+ "logps/rejected": -60.94548797607422,
1230
+ "loss": 0.3141,
1231
+ "rewards/accuracies": 0.9375,
1232
+ "rewards/chosen": 17.08698272705078,
1233
+ "rewards/margins": 5.719264984130859,
1234
+ "rewards/rejected": 11.367716789245605,
1235
+ "step": 700
1236
+ },
1237
+ {
1238
+ "epoch": 0.6311111111111111,
1239
+ "grad_norm": 0.06711317598819733,
1240
+ "learning_rate": 1.8031184721336364e-06,
1241
+ "logits/chosen": -0.017312290146946907,
1242
+ "logits/rejected": -0.026554957032203674,
1243
+ "logps/chosen": -5.120705604553223,
1244
+ "logps/rejected": -54.33483123779297,
1245
+ "loss": 0.3827,
1246
+ "rewards/accuracies": 0.887499988079071,
1247
+ "rewards/chosen": 17.1247501373291,
1248
+ "rewards/margins": 5.327882766723633,
1249
+ "rewards/rejected": 11.796867370605469,
1250
+ "step": 710
1251
+ },
1252
+ {
1253
+ "epoch": 0.64,
1254
+ "grad_norm": 60.36637496948242,
1255
+ "learning_rate": 1.7289338645289711e-06,
1256
+ "logits/chosen": -0.01987219974398613,
1257
+ "logits/rejected": -0.02766304835677147,
1258
+ "logps/chosen": -2.1757986545562744,
1259
+ "logps/rejected": -54.02531433105469,
1260
+ "loss": 0.2272,
1261
+ "rewards/accuracies": 0.949999988079071,
1262
+ "rewards/chosen": 17.239776611328125,
1263
+ "rewards/margins": 5.250313758850098,
1264
+ "rewards/rejected": 11.989462852478027,
1265
+ "step": 720
1266
+ },
1267
+ {
1268
+ "epoch": 0.64,
1269
+ "eval_logits/chosen": -0.022905193269252777,
1270
+ "eval_logits/rejected": -0.0321992002427578,
1271
+ "eval_logps/chosen": -1.100506067276001,
1272
+ "eval_logps/rejected": -55.91169738769531,
1273
+ "eval_loss": 0.14018221199512482,
1274
+ "eval_rewards/accuracies": 0.9520000219345093,
1275
+ "eval_rewards/chosen": 17.392841339111328,
1276
+ "eval_rewards/margins": 5.623291492462158,
1277
+ "eval_rewards/rejected": 11.769551277160645,
1278
+ "eval_runtime": 361.416,
1279
+ "eval_samples_per_second": 2.767,
1280
+ "eval_steps_per_second": 0.346,
1281
+ "step": 720
1282
+ },
1283
+ {
1284
+ "epoch": 0.6488888888888888,
1285
+ "grad_norm": 0.029294608160853386,
1286
+ "learning_rate": 1.6554922683164875e-06,
1287
+ "logits/chosen": -0.016416028141975403,
1288
+ "logits/rejected": -0.025527067482471466,
1289
+ "logps/chosen": -0.906692385673523,
1290
+ "logps/rejected": -59.142173767089844,
1291
+ "loss": 0.1171,
1292
+ "rewards/accuracies": 0.9624999761581421,
1293
+ "rewards/chosen": 17.282169342041016,
1294
+ "rewards/margins": 5.6978583335876465,
1295
+ "rewards/rejected": 11.584310531616211,
1296
+ "step": 730
1297
+ },
1298
+ {
1299
+ "epoch": 0.6577777777777778,
1300
+ "grad_norm": 33.89070510864258,
1301
+ "learning_rate": 1.5828644529673592e-06,
1302
+ "logits/chosen": -0.016044551506638527,
1303
+ "logits/rejected": -0.025811903178691864,
1304
+ "logps/chosen": -1.1167538166046143,
1305
+ "logps/rejected": -59.84492492675781,
1306
+ "loss": 0.2001,
1307
+ "rewards/accuracies": 0.9624999761581421,
1308
+ "rewards/chosen": 17.266983032226562,
1309
+ "rewards/margins": 5.749438285827637,
1310
+ "rewards/rejected": 11.51754379272461,
1311
+ "step": 740
1312
+ },
1313
+ {
1314
+ "epoch": 0.6666666666666666,
1315
+ "grad_norm": 0.53726726770401,
1316
+ "learning_rate": 1.5111204037807844e-06,
1317
+ "logits/chosen": -0.012529855594038963,
1318
+ "logits/rejected": -0.023765765130519867,
1319
+ "logps/chosen": -0.3461765646934509,
1320
+ "logps/rejected": -55.841102600097656,
1321
+ "loss": 0.1692,
1322
+ "rewards/accuracies": 0.9375,
1323
+ "rewards/chosen": 17.61865997314453,
1324
+ "rewards/margins": 6.019055366516113,
1325
+ "rewards/rejected": 11.599604606628418,
1326
+ "step": 750
1327
+ },
1328
+ {
1329
+ "epoch": 0.6755555555555556,
1330
+ "grad_norm": 0.7173987627029419,
1331
+ "learning_rate": 1.4403292544450625e-06,
1332
+ "logits/chosen": -0.013380522839725018,
1333
+ "logits/rejected": -0.022107835859060287,
1334
+ "logps/chosen": -3.2759666442871094,
1335
+ "logps/rejected": -53.836822509765625,
1336
+ "loss": 0.2418,
1337
+ "rewards/accuracies": 0.949999988079071,
1338
+ "rewards/chosen": 17.383333206176758,
1339
+ "rewards/margins": 5.590358734130859,
1340
+ "rewards/rejected": 11.792974472045898,
1341
+ "step": 760
1342
+ },
1343
+ {
1344
+ "epoch": 0.6844444444444444,
1345
+ "grad_norm": 2.744900941848755,
1346
+ "learning_rate": 1.3705592204192853e-06,
1347
+ "logits/chosen": -0.011318420059978962,
1348
+ "logits/rejected": -0.021840626373887062,
1349
+ "logps/chosen": -2.592001438140869,
1350
+ "logps/rejected": -57.06926345825195,
1351
+ "loss": 0.1749,
1352
+ "rewards/accuracies": 0.9500000476837158,
1353
+ "rewards/chosen": 17.286128997802734,
1354
+ "rewards/margins": 5.662715911865234,
1355
+ "rewards/rejected": 11.623414993286133,
1356
+ "step": 770
1357
+ },
1358
+ {
1359
+ "epoch": 0.6933333333333334,
1360
+ "grad_norm": 70.76551818847656,
1361
+ "learning_rate": 1.301877533199859e-06,
1362
+ "logits/chosen": -0.013944407925009727,
1363
+ "logits/rejected": -0.025667501613497734,
1364
+ "logps/chosen": -2.1070234775543213,
1365
+ "logps/rejected": -57.72039794921875,
1366
+ "loss": 0.1915,
1367
+ "rewards/accuracies": 0.925000011920929,
1368
+ "rewards/chosen": 17.28545379638672,
1369
+ "rewards/margins": 5.708344459533691,
1370
+ "rewards/rejected": 11.577108383178711,
1371
+ "step": 780
1372
+ },
1373
+ {
1374
+ "epoch": 0.6933333333333334,
1375
+ "eval_logits/chosen": -0.01664295792579651,
1376
+ "eval_logits/rejected": -0.026626665145158768,
1377
+ "eval_logps/chosen": -1.082255244255066,
1378
+ "eval_logps/rejected": -55.95073699951172,
1379
+ "eval_loss": 0.24412688612937927,
1380
+ "eval_rewards/accuracies": 0.9320000410079956,
1381
+ "eval_rewards/chosen": 17.394668579101562,
1382
+ "eval_rewards/margins": 5.629020690917969,
1383
+ "eval_rewards/rejected": 11.765647888183594,
1384
+ "eval_runtime": 361.4602,
1385
+ "eval_samples_per_second": 2.767,
1386
+ "eval_steps_per_second": 0.346,
1387
+ "step": 780
1388
+ },
1389
+ {
1390
+ "epoch": 0.7022222222222222,
1391
+ "grad_norm": 0.41359376907348633,
1392
+ "learning_rate": 1.2343503755351729e-06,
1393
+ "logits/chosen": -0.012772129848599434,
1394
+ "logits/rejected": -0.023186586797237396,
1395
+ "logps/chosen": -0.8030359148979187,
1396
+ "logps/rejected": -57.84947967529297,
1397
+ "loss": 0.2143,
1398
+ "rewards/accuracies": 0.9375,
1399
+ "rewards/chosen": 17.32242202758789,
1400
+ "rewards/margins": 5.645486831665039,
1401
+ "rewards/rejected": 11.676933288574219,
1402
+ "step": 790
1403
+ },
1404
+ {
1405
+ "epoch": 0.7111111111111111,
1406
+ "grad_norm": 0.41748157143592834,
1407
+ "learning_rate": 1.168042817650881e-06,
1408
+ "logits/chosen": -0.012502101249992847,
1409
+ "logits/rejected": -0.023272844031453133,
1410
+ "logps/chosen": -1.0377256870269775,
1411
+ "logps/rejected": -57.05615997314453,
1412
+ "loss": 0.0792,
1413
+ "rewards/accuracies": 0.9750000238418579,
1414
+ "rewards/chosen": 17.60501480102539,
1415
+ "rewards/margins": 6.159370422363281,
1416
+ "rewards/rejected": 11.445646286010742,
1417
+ "step": 800
1418
+ },
1419
+ {
1420
+ "epoch": 0.72,
1421
+ "grad_norm": 0.039210401475429535,
1422
+ "learning_rate": 1.1030187545472012e-06,
1423
+ "logits/chosen": -0.008177272044122219,
1424
+ "logits/rejected": -0.01832464337348938,
1425
+ "logps/chosen": -3.8701748847961426,
1426
+ "logps/rejected": -57.661476135253906,
1427
+ "loss": 0.3177,
1428
+ "rewards/accuracies": 0.925000011920929,
1429
+ "rewards/chosen": 17.14336395263672,
1430
+ "rewards/margins": 5.583393573760986,
1431
+ "rewards/rejected": 11.55997085571289,
1432
+ "step": 810
1433
+ },
1434
+ {
1435
+ "epoch": 0.7288888888888889,
1436
+ "grad_norm": 1.6096951961517334,
1437
+ "learning_rate": 1.0393408444287048e-06,
1438
+ "logits/chosen": -0.006832236424088478,
1439
+ "logits/rejected": -0.01682097464799881,
1440
+ "logps/chosen": -2.1741790771484375,
1441
+ "logps/rejected": -57.273414611816406,
1442
+ "loss": 0.2128,
1443
+ "rewards/accuracies": 0.949999988079071,
1444
+ "rewards/chosen": 17.426712036132812,
1445
+ "rewards/margins": 5.911205291748047,
1446
+ "rewards/rejected": 11.515506744384766,
1447
+ "step": 820
1448
+ },
1449
+ {
1450
+ "epoch": 0.7377777777777778,
1451
+ "grad_norm": 3.7820959091186523,
1452
+ "learning_rate": 9.770704483258782e-07,
1453
+ "logits/chosen": -0.009998206980526447,
1454
+ "logits/rejected": -0.0204261876642704,
1455
+ "logps/chosen": -1.9802953004837036,
1456
+ "logps/rejected": -56.901512145996094,
1457
+ "loss": 0.1296,
1458
+ "rewards/accuracies": 0.9750000238418579,
1459
+ "rewards/chosen": 17.56071662902832,
1460
+ "rewards/margins": 6.169583320617676,
1461
+ "rewards/rejected": 11.391134262084961,
1462
+ "step": 830
1463
+ },
1464
+ {
1465
+ "epoch": 0.7466666666666667,
1466
+ "grad_norm": 1.126626968383789,
1467
+ "learning_rate": 9.162675709666865e-07,
1468
+ "logits/chosen": -0.00826224498450756,
1469
+ "logits/rejected": -0.018977787345647812,
1470
+ "logps/chosen": -1.4256607294082642,
1471
+ "logps/rejected": -61.65986633300781,
1472
+ "loss": 0.0635,
1473
+ "rewards/accuracies": 0.987500011920929,
1474
+ "rewards/chosen": 17.324975967407227,
1475
+ "rewards/margins": 6.09440803527832,
1476
+ "rewards/rejected": 11.230567932128906,
1477
+ "step": 840
1478
+ },
1479
+ {
1480
+ "epoch": 0.7466666666666667,
1481
+ "eval_logits/chosen": -0.01111944392323494,
1482
+ "eval_logits/rejected": -0.021697774529457092,
1483
+ "eval_logps/chosen": -1.2168633937835693,
1484
+ "eval_logps/rejected": -58.2642822265625,
1485
+ "eval_loss": 0.1689341962337494,
1486
+ "eval_rewards/accuracies": 0.9450000524520874,
1487
+ "eval_rewards/chosen": 17.381206512451172,
1488
+ "eval_rewards/margins": 5.846914291381836,
1489
+ "eval_rewards/rejected": 11.534292221069336,
1490
+ "eval_runtime": 361.6192,
1491
+ "eval_samples_per_second": 2.765,
1492
+ "eval_steps_per_second": 0.346,
1493
+ "step": 840
1494
+ },
1495
+ {
1496
+ "epoch": 0.7555555555555555,
1497
+ "grad_norm": 5.250723838806152,
1498
+ "learning_rate": 8.569908029550686e-07,
1499
+ "logits/chosen": -0.006854387000203133,
1500
+ "logits/rejected": -0.018336206674575806,
1501
+ "logps/chosen": -0.6238930821418762,
1502
+ "logps/rejected": -60.925689697265625,
1503
+ "loss": 0.1157,
1504
+ "rewards/accuracies": 0.9750000238418579,
1505
+ "rewards/chosen": 17.443281173706055,
1506
+ "rewards/margins": 6.164813995361328,
1507
+ "rewards/rejected": 11.278467178344727,
1508
+ "step": 850
1509
+ },
1510
+ {
1511
+ "epoch": 0.7644444444444445,
1512
+ "grad_norm": 3.1401162147521973,
1513
+ "learning_rate": 7.992972643121227e-07,
1514
+ "logits/chosen": -0.0037835021503269672,
1515
+ "logits/rejected": -0.013135241344571114,
1516
+ "logps/chosen": -0.8492221832275391,
1517
+ "logps/rejected": -55.516075134277344,
1518
+ "loss": 0.2252,
1519
+ "rewards/accuracies": 0.9375,
1520
+ "rewards/chosen": 17.488588333129883,
1521
+ "rewards/margins": 5.736725807189941,
1522
+ "rewards/rejected": 11.751862525939941,
1523
+ "step": 860
1524
+ },
1525
+ {
1526
+ "epoch": 0.7733333333333333,
1527
+ "grad_norm": 55.528812408447266,
1528
+ "learning_rate": 7.432425494343509e-07,
1529
+ "logits/chosen": -0.0033687639515846968,
1530
+ "logits/rejected": -0.013152632862329483,
1531
+ "logps/chosen": -1.3188884258270264,
1532
+ "logps/rejected": -57.9510498046875,
1533
+ "loss": 0.1398,
1534
+ "rewards/accuracies": 0.9624999761581421,
1535
+ "rewards/chosen": 17.412578582763672,
1536
+ "rewards/margins": 5.868515968322754,
1537
+ "rewards/rejected": 11.544061660766602,
1538
+ "step": 870
1539
+ },
1540
+ {
1541
+ "epoch": 0.7822222222222223,
1542
+ "grad_norm": 0.039824869483709335,
1543
+ "learning_rate": 6.888806735220396e-07,
1544
+ "logits/chosen": -0.0010406378423795104,
1545
+ "logits/rejected": -0.012095071375370026,
1546
+ "logps/chosen": -2.0619027614593506,
1547
+ "logps/rejected": -59.65806579589844,
1548
+ "loss": 0.2966,
1549
+ "rewards/accuracies": 0.9375,
1550
+ "rewards/chosen": 17.192535400390625,
1551
+ "rewards/margins": 5.7006731033325195,
1552
+ "rewards/rejected": 11.491861343383789,
1553
+ "step": 880
1554
+ },
1555
+ {
1556
+ "epoch": 0.7911111111111111,
1557
+ "grad_norm": 0.7536466717720032,
1558
+ "learning_rate": 6.362640205293583e-07,
1559
+ "logits/chosen": -0.0016857212176546454,
1560
+ "logits/rejected": -0.010936147533357143,
1561
+ "logps/chosen": -2.1478958129882812,
1562
+ "logps/rejected": -58.20386505126953,
1563
+ "loss": 0.1641,
1564
+ "rewards/accuracies": 0.9375,
1565
+ "rewards/chosen": 17.243247985839844,
1566
+ "rewards/margins": 5.678771018981934,
1567
+ "rewards/rejected": 11.564477920532227,
1568
+ "step": 890
1569
+ },
1570
+ {
1571
+ "epoch": 0.8,
1572
+ "grad_norm": 0.3857377767562866,
1573
+ "learning_rate": 5.854432926863684e-07,
1574
+ "logits/chosen": 0.00038508616853505373,
1575
+ "logits/rejected": -0.011034643277525902,
1576
+ "logps/chosen": -1.606274127960205,
1577
+ "logps/rejected": -62.58662414550781,
1578
+ "loss": 0.1703,
1579
+ "rewards/accuracies": 0.9624999761581421,
1580
+ "rewards/chosen": 17.211261749267578,
1581
+ "rewards/margins": 5.966868877410889,
1582
+ "rewards/rejected": 11.244392395019531,
1583
+ "step": 900
1584
+ },
1585
+ {
1586
+ "epoch": 0.8,
1587
+ "eval_logits/chosen": 0.00021816430671606213,
1588
+ "eval_logits/rejected": -0.010477552190423012,
1589
+ "eval_logps/chosen": -1.7576563358306885,
1590
+ "eval_logps/rejected": -59.79063415527344,
1591
+ "eval_loss": 0.13995186984539032,
1592
+ "eval_rewards/accuracies": 0.9610000848770142,
1593
+ "eval_rewards/chosen": 17.327129364013672,
1594
+ "eval_rewards/margins": 5.945469856262207,
1595
+ "eval_rewards/rejected": 11.381658554077148,
1596
+ "eval_runtime": 361.4592,
1597
+ "eval_samples_per_second": 2.767,
1598
+ "eval_steps_per_second": 0.346,
1599
+ "step": 900
1600
+ },
1601
+ {
1602
+ "epoch": 0.8088888888888889,
1603
+ "grad_norm": 0.13143697381019592,
1604
+ "learning_rate": 5.364674616415547e-07,
1605
+ "logits/chosen": 0.0005570838693529367,
1606
+ "logits/rejected": -0.011198626831173897,
1607
+ "logps/chosen": -0.12791283428668976,
1608
+ "logps/rejected": -65.7052993774414,
1609
+ "loss": 0.0507,
1610
+ "rewards/accuracies": 0.9750000238418579,
1611
+ "rewards/chosen": 17.224443435668945,
1612
+ "rewards/margins": 6.163690567016602,
1613
+ "rewards/rejected": 11.060752868652344,
1614
+ "step": 910
1615
+ },
1616
+ {
1617
+ "epoch": 0.8177777777777778,
1618
+ "grad_norm": 102.9654541015625,
1619
+ "learning_rate": 4.893837212719859e-07,
1620
+ "logits/chosen": -0.0008557128603570163,
1621
+ "logits/rejected": -0.01163212489336729,
1622
+ "logps/chosen": -1.3292646408081055,
1623
+ "logps/rejected": -61.644893646240234,
1624
+ "loss": 0.0881,
1625
+ "rewards/accuracies": 0.9500000476837158,
1626
+ "rewards/chosen": 17.276538848876953,
1627
+ "rewards/margins": 5.96080207824707,
1628
+ "rewards/rejected": 11.3157377243042,
1629
+ "step": 920
1630
+ },
1631
+ {
1632
+ "epoch": 0.8266666666666667,
1633
+ "grad_norm": 146.04498291015625,
1634
+ "learning_rate": 4.442374422065493e-07,
1635
+ "logits/chosen": 0.002922601066529751,
1636
+ "logits/rejected": -0.007130052894353867,
1637
+ "logps/chosen": -1.6069023609161377,
1638
+ "logps/rejected": -61.52588653564453,
1639
+ "loss": 0.1373,
1640
+ "rewards/accuracies": 0.949999988079071,
1641
+ "rewards/chosen": 17.184974670410156,
1642
+ "rewards/margins": 5.7995734214782715,
1643
+ "rewards/rejected": 11.38540267944336,
1644
+ "step": 930
1645
+ },
1646
+ {
1647
+ "epoch": 0.8355555555555556,
1648
+ "grad_norm": 23.539485931396484,
1649
+ "learning_rate": 4.0107212810610974e-07,
1650
+ "logits/chosen": 0.0018056132830679417,
1651
+ "logits/rejected": -0.007847340777516365,
1652
+ "logps/chosen": -3.637047290802002,
1653
+ "logps/rejected": -61.21245574951172,
1654
+ "loss": 0.2763,
1655
+ "rewards/accuracies": 0.925000011920929,
1656
+ "rewards/chosen": 16.960758209228516,
1657
+ "rewards/margins": 5.551811695098877,
1658
+ "rewards/rejected": 11.40894603729248,
1659
+ "step": 940
1660
+ },
1661
+ {
1662
+ "epoch": 0.8444444444444444,
1663
+ "grad_norm": 31.52926254272461,
1664
+ "learning_rate": 3.599293737426932e-07,
1665
+ "logits/chosen": 0.0032081177923828363,
1666
+ "logits/rejected": -0.007756482809782028,
1667
+ "logps/chosen": -1.172515869140625,
1668
+ "logps/rejected": -66.5853271484375,
1669
+ "loss": 0.1758,
1670
+ "rewards/accuracies": 0.949999988079071,
1671
+ "rewards/chosen": 16.992876052856445,
1672
+ "rewards/margins": 5.886469841003418,
1673
+ "rewards/rejected": 11.106407165527344,
1674
+ "step": 950
1675
+ },
1676
+ {
1677
+ "epoch": 0.8533333333333334,
1678
+ "grad_norm": 0.07474468648433685,
1679
+ "learning_rate": 3.208488249181216e-07,
1680
+ "logits/chosen": 0.0022508346009999514,
1681
+ "logits/rejected": -0.009156409651041031,
1682
+ "logps/chosen": -0.7930470108985901,
1683
+ "logps/rejected": -60.068790435791016,
1684
+ "loss": 0.1138,
1685
+ "rewards/accuracies": 0.9624999761581421,
1686
+ "rewards/chosen": 17.401771545410156,
1687
+ "rewards/margins": 6.0026960372924805,
1688
+ "rewards/rejected": 11.399076461791992,
1689
+ "step": 960
1690
+ },
1691
+ {
1692
+ "epoch": 0.8533333333333334,
1693
+ "eval_logits/chosen": 0.001469604205340147,
1694
+ "eval_logits/rejected": -0.009397665038704872,
1695
+ "eval_logps/chosen": -1.8795456886291504,
1696
+ "eval_logps/rejected": -60.17564010620117,
1697
+ "eval_loss": 0.1441129744052887,
1698
+ "eval_rewards/accuracies": 0.9630000591278076,
1699
+ "eval_rewards/chosen": 17.314937591552734,
1700
+ "eval_rewards/margins": 5.9717817306518555,
1701
+ "eval_rewards/rejected": 11.343156814575195,
1702
+ "eval_runtime": 361.5344,
1703
+ "eval_samples_per_second": 2.766,
1704
+ "eval_steps_per_second": 0.346,
1705
+ "step": 960
1706
+ },
1707
+ {
1708
+ "epoch": 0.8622222222222222,
1709
+ "grad_norm": 68.90747833251953,
1710
+ "learning_rate": 2.838681402606952e-07,
1711
+ "logits/chosen": 0.004552370868623257,
1712
+ "logits/rejected": -0.005488495342433453,
1713
+ "logps/chosen": -3.7298974990844727,
1714
+ "logps/rejected": -64.72488403320312,
1715
+ "loss": 0.2425,
1716
+ "rewards/accuracies": 0.925000011920929,
1717
+ "rewards/chosen": 16.832754135131836,
1718
+ "rewards/margins": 5.644216537475586,
1719
+ "rewards/rejected": 11.18853759765625,
1720
+ "step": 970
1721
+ },
1722
+ {
1723
+ "epoch": 0.8711111111111111,
1724
+ "grad_norm": 0.028206102550029755,
1725
+ "learning_rate": 2.490229549367443e-07,
1726
+ "logits/chosen": 0.0025807656347751617,
1727
+ "logits/rejected": -0.008657123893499374,
1728
+ "logps/chosen": -1.1825838088989258,
1729
+ "logps/rejected": -63.57493591308594,
1730
+ "loss": 0.0609,
1731
+ "rewards/accuracies": 0.9750000238418579,
1732
+ "rewards/chosen": 17.227130889892578,
1733
+ "rewards/margins": 6.053717136383057,
1734
+ "rewards/rejected": 11.173412322998047,
1735
+ "step": 980
1736
+ },
1737
+ {
1738
+ "epoch": 0.88,
1739
+ "grad_norm": 0.39742231369018555,
1740
+ "learning_rate": 2.1634684631203412e-07,
1741
+ "logits/chosen": 0.0048486413434147835,
1742
+ "logits/rejected": -0.006056814920157194,
1743
+ "logps/chosen": -2.919680595397949,
1744
+ "logps/rejected": -57.8403205871582,
1745
+ "loss": 0.3464,
1746
+ "rewards/accuracies": 0.8999999761581421,
1747
+ "rewards/chosen": 17.175275802612305,
1748
+ "rewards/margins": 5.559727668762207,
1749
+ "rewards/rejected": 11.615548133850098,
1750
+ "step": 990
1751
+ },
1752
+ {
1753
+ "epoch": 0.8888888888888888,
1754
+ "grad_norm": 30.22509002685547,
1755
+ "learning_rate": 1.8587130159608196e-07,
1756
+ "logits/chosen": 0.0050649940967559814,
1757
+ "logits/rejected": -0.0071399761363863945,
1758
+ "logps/chosen": -0.16131475567817688,
1759
+ "logps/rejected": -66.25190734863281,
1760
+ "loss": 0.0063,
1761
+ "rewards/accuracies": 1.0,
1762
+ "rewards/chosen": 17.313472747802734,
1763
+ "rewards/margins": 6.412895679473877,
1764
+ "rewards/rejected": 10.9005765914917,
1765
+ "step": 1000
1766
+ },
1767
+ {
1768
+ "epoch": 0.8977777777777778,
1769
+ "grad_norm": 53.02751922607422,
1770
+ "learning_rate": 1.5762568750059604e-07,
1771
+ "logits/chosen": 0.005312003195285797,
1772
+ "logits/rejected": -0.0038596936501562595,
1773
+ "logps/chosen": -4.00323486328125,
1774
+ "logps/rejected": -57.656890869140625,
1775
+ "loss": 0.2741,
1776
+ "rewards/accuracies": 0.925000011920929,
1777
+ "rewards/chosen": 17.167781829833984,
1778
+ "rewards/margins": 5.632095813751221,
1779
+ "rewards/rejected": 11.535685539245605,
1780
+ "step": 1010
1781
+ },
1782
+ {
1783
+ "epoch": 0.9066666666666666,
1784
+ "grad_norm": 119.56330108642578,
1785
+ "learning_rate": 1.316372219412454e-07,
1786
+ "logits/chosen": 0.004333779215812683,
1787
+ "logits/rejected": -0.007937717251479626,
1788
+ "logps/chosen": -0.10221245884895325,
1789
+ "logps/rejected": -67.09977722167969,
1790
+ "loss": 0.0513,
1791
+ "rewards/accuracies": 0.9750000238418579,
1792
+ "rewards/chosen": 17.151248931884766,
1793
+ "rewards/margins": 6.162137985229492,
1794
+ "rewards/rejected": 10.98911190032959,
1795
+ "step": 1020
1796
+ },
1797
+ {
1798
+ "epoch": 0.9066666666666666,
1799
+ "eval_logits/chosen": 0.004528110846877098,
1800
+ "eval_logits/rejected": -0.006501312367618084,
1801
+ "eval_logps/chosen": -1.817779302597046,
1802
+ "eval_logps/rejected": -60.34454345703125,
1803
+ "eval_loss": 0.14118175208568573,
1804
+ "eval_rewards/accuracies": 0.9610000848770142,
1805
+ "eval_rewards/chosen": 17.321117401123047,
1806
+ "eval_rewards/margins": 5.994848251342773,
1807
+ "eval_rewards/rejected": 11.32626724243164,
1808
+ "eval_runtime": 361.545,
1809
+ "eval_samples_per_second": 2.766,
1810
+ "eval_steps_per_second": 0.346,
1811
+ "step": 1020
1812
+ },
1813
+ {
1814
+ "epoch": 0.9155555555555556,
1815
+ "grad_norm": 0.9030271768569946,
1816
+ "learning_rate": 1.0793094781005792e-07,
1817
+ "logits/chosen": 0.0036234352737665176,
1818
+ "logits/rejected": -0.008390933275222778,
1819
+ "logps/chosen": -0.09899584949016571,
1820
+ "logps/rejected": -60.80555725097656,
1821
+ "loss": 0.0349,
1822
+ "rewards/accuracies": 0.987500011920929,
1823
+ "rewards/chosen": 17.619266510009766,
1824
+ "rewards/margins": 6.4814043045043945,
1825
+ "rewards/rejected": 11.137863159179688,
1826
+ "step": 1030
1827
+ },
1828
+ {
1829
+ "epoch": 0.9244444444444444,
1830
+ "grad_norm": 157.070068359375,
1831
+ "learning_rate": 8.652970884369255e-08,
1832
+ "logits/chosen": 0.005321115255355835,
1833
+ "logits/rejected": -0.004976513795554638,
1834
+ "logps/chosen": -1.3535833358764648,
1835
+ "logps/rejected": -61.2861328125,
1836
+ "loss": 0.1927,
1837
+ "rewards/accuracies": 0.9500000476837158,
1838
+ "rewards/chosen": 17.209800720214844,
1839
+ "rewards/margins": 5.795952796936035,
1840
+ "rewards/rejected": 11.413846015930176,
1841
+ "step": 1040
1842
+ },
1843
+ {
1844
+ "epoch": 0.9333333333333333,
1845
+ "grad_norm": 24.539953231811523,
1846
+ "learning_rate": 6.745412761086007e-08,
1847
+ "logits/chosen": 0.005845514126121998,
1848
+ "logits/rejected": -0.003921338357031345,
1849
+ "logps/chosen": -1.7223193645477295,
1850
+ "logps/rejected": -58.172142028808594,
1851
+ "loss": 0.1317,
1852
+ "rewards/accuracies": 0.9750000238418579,
1853
+ "rewards/chosen": 17.360435485839844,
1854
+ "rewards/margins": 5.838218688964844,
1855
+ "rewards/rejected": 11.522216796875,
1856
+ "step": 1050
1857
+ },
1858
+ {
1859
+ "epoch": 0.9422222222222222,
1860
+ "grad_norm": 0.061458222568035126,
1861
+ "learning_rate": 5.0722585640090305e-08,
1862
+ "logits/chosen": 0.003759522922337055,
1863
+ "logits/rejected": -0.0068417866714298725,
1864
+ "logps/chosen": -1.351855754852295,
1865
+ "logps/rejected": -59.56566619873047,
1866
+ "loss": 0.2279,
1867
+ "rewards/accuracies": 0.9375,
1868
+ "rewards/chosen": 17.245838165283203,
1869
+ "rewards/margins": 5.7038679122924805,
1870
+ "rewards/rejected": 11.541970252990723,
1871
+ "step": 1060
1872
+ },
1873
+ {
1874
+ "epoch": 0.9511111111111111,
1875
+ "grad_norm": 101.87332916259766,
1876
+ "learning_rate": 3.635120570700784e-08,
1877
+ "logits/chosen": 0.007607857696712017,
1878
+ "logits/rejected": -0.0035320711322128773,
1879
+ "logps/chosen": -1.4004669189453125,
1880
+ "logps/rejected": -63.46752166748047,
1881
+ "loss": 0.1021,
1882
+ "rewards/accuracies": 0.9624999761581421,
1883
+ "rewards/chosen": 17.240036010742188,
1884
+ "rewards/margins": 6.078882694244385,
1885
+ "rewards/rejected": 11.161155700683594,
1886
+ "step": 1070
1887
+ },
1888
+ {
1889
+ "epoch": 0.96,
1890
+ "grad_norm": 1.9229660034179688,
1891
+ "learning_rate": 2.4353836298169343e-08,
1892
+ "logits/chosen": 0.006209026090800762,
1893
+ "logits/rejected": -0.0033816141076385975,
1894
+ "logps/chosen": -2.0281822681427,
1895
+ "logps/rejected": -61.460670471191406,
1896
+ "loss": 0.1189,
1897
+ "rewards/accuracies": 0.9750000238418579,
1898
+ "rewards/chosen": 17.19972801208496,
1899
+ "rewards/margins": 5.881450653076172,
1900
+ "rewards/rejected": 11.318277359008789,
1901
+ "step": 1080
1902
+ },
1903
+ {
1904
+ "epoch": 0.96,
1905
+ "eval_logits/chosen": 0.00737445754930377,
1906
+ "eval_logits/rejected": -0.0035832570865750313,
1907
+ "eval_logps/chosen": -2.1419789791107178,
1908
+ "eval_logps/rejected": -60.60612106323242,
1909
+ "eval_loss": 0.15081512928009033,
1910
+ "eval_rewards/accuracies": 0.9610000848770142,
1911
+ "eval_rewards/chosen": 17.288694381713867,
1912
+ "eval_rewards/margins": 5.98858642578125,
1913
+ "eval_rewards/rejected": 11.300108909606934,
1914
+ "eval_runtime": 361.4165,
1915
+ "eval_samples_per_second": 2.767,
1916
+ "eval_steps_per_second": 0.346,
1917
+ "step": 1080
1918
+ },
1919
+ {
1920
+ "epoch": 0.9688888888888889,
1921
+ "grad_norm": 0.11888863146305084,
1922
+ "learning_rate": 1.4742038266447046e-08,
1923
+ "logits/chosen": 0.006043245084583759,
1924
+ "logits/rejected": -0.004712546244263649,
1925
+ "logps/chosen": -1.4127472639083862,
1926
+ "logps/rejected": -64.81529998779297,
1927
+ "loss": 0.0728,
1928
+ "rewards/accuracies": 0.9750000238418579,
1929
+ "rewards/chosen": 17.181049346923828,
1930
+ "rewards/margins": 6.14100456237793,
1931
+ "rewards/rejected": 11.040044784545898,
1932
+ "step": 1090
1933
+ },
1934
+ {
1935
+ "epoch": 0.9777777777777777,
1936
+ "grad_norm": 2.001819372177124,
1937
+ "learning_rate": 7.525073690809737e-09,
1938
+ "logits/chosen": 0.0055408780463039875,
1939
+ "logits/rejected": -0.005598037503659725,
1940
+ "logps/chosen": -1.017110824584961,
1941
+ "logps/rejected": -60.28044891357422,
1942
+ "loss": 0.0357,
1943
+ "rewards/accuracies": 0.9750000238418579,
1944
+ "rewards/chosen": 17.4679012298584,
1945
+ "rewards/margins": 6.193048477172852,
1946
+ "rewards/rejected": 11.274852752685547,
1947
+ "step": 1100
1948
+ },
1949
+ {
1950
+ "epoch": 0.9866666666666667,
1951
+ "grad_norm": 0.027056939899921417,
1952
+ "learning_rate": 2.709896951238744e-09,
1953
+ "logits/chosen": 0.00636716466397047,
1954
+ "logits/rejected": -0.004360577557235956,
1955
+ "logps/chosen": -2.8028905391693115,
1956
+ "logps/rejected": -60.86518859863281,
1957
+ "loss": 0.1973,
1958
+ "rewards/accuracies": 0.949999988079071,
1959
+ "rewards/chosen": 17.210033416748047,
1960
+ "rewards/margins": 5.936794281005859,
1961
+ "rewards/rejected": 11.273238182067871,
1962
+ "step": 1110
1963
+ },
1964
+ {
1965
+ "epoch": 0.9955555555555555,
1966
+ "grad_norm": 0.15637506544589996,
1967
+ "learning_rate": 3.0114802737818415e-10,
1968
+ "logits/chosen": 0.0038183885626494884,
1969
+ "logits/rejected": -0.007296917960047722,
1970
+ "logps/chosen": -1.1477452516555786,
1971
+ "logps/rejected": -59.37739181518555,
1972
+ "loss": 0.0538,
1973
+ "rewards/accuracies": 0.987500011920929,
1974
+ "rewards/chosen": 17.553251266479492,
1975
+ "rewards/margins": 6.2790141105651855,
1976
+ "rewards/rejected": 11.274236679077148,
1977
+ "step": 1120
1978
+ },
1979
+ {
1980
+ "epoch": 1.0,
1981
+ "step": 1125,
1982
+ "total_flos": 1.4338459346927616e+18,
1983
+ "train_loss": 0.23597783709896936,
1984
+ "train_runtime": 13850.7044,
1985
+ "train_samples_per_second": 0.65,
1986
+ "train_steps_per_second": 0.081
1987
+ }
1988
+ ],
1989
+ "logging_steps": 10,
1990
+ "max_steps": 1125,
1991
+ "num_input_tokens_seen": 0,
1992
+ "num_train_epochs": 1,
1993
+ "save_steps": 500,
1994
+ "stateful_callbacks": {
1995
+ "TrainerControl": {
1996
+ "args": {
1997
+ "should_epoch_stop": false,
1998
+ "should_evaluate": false,
1999
+ "should_log": false,
2000
+ "should_save": true,
2001
+ "should_training_stop": true
2002
+ },
2003
+ "attributes": {}
2004
+ }
2005
+ },
2006
+ "total_flos": 1.4338459346927616e+18,
2007
+ "train_batch_size": 4,
2008
+ "trial_name": null,
2009
+ "trial_params": null
2010
+ }