Howard881010 commited on
Commit
db94873
1 Parent(s): 502cd69

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. README.md +103 -0
  3. adapter_config.json +34 -0
  4. adapter_model.safetensors +3 -0
  5. all_results.json +20 -0
  6. checkpoint-1000/README.md +202 -0
  7. checkpoint-1000/adapter_config.json +34 -0
  8. checkpoint-1000/adapter_model.safetensors +3 -0
  9. checkpoint-1000/optimizer.pt +3 -0
  10. checkpoint-1000/rng_state_0.pth +3 -0
  11. checkpoint-1000/rng_state_1.pth +3 -0
  12. checkpoint-1000/scheduler.pt +3 -0
  13. checkpoint-1000/special_tokens_map.json +24 -0
  14. checkpoint-1000/tokenizer.json +3 -0
  15. checkpoint-1000/tokenizer_config.json +0 -0
  16. checkpoint-1000/trainer_state.json +1789 -0
  17. checkpoint-1000/training_args.bin +3 -0
  18. checkpoint-1500/README.md +202 -0
  19. checkpoint-1500/adapter_config.json +34 -0
  20. checkpoint-1500/adapter_model.safetensors +3 -0
  21. checkpoint-1500/optimizer.pt +3 -0
  22. checkpoint-1500/rng_state_0.pth +3 -0
  23. checkpoint-1500/rng_state_1.pth +3 -0
  24. checkpoint-1500/scheduler.pt +3 -0
  25. checkpoint-1500/special_tokens_map.json +24 -0
  26. checkpoint-1500/tokenizer.json +3 -0
  27. checkpoint-1500/tokenizer_config.json +0 -0
  28. checkpoint-1500/trainer_state.json +2683 -0
  29. checkpoint-1500/training_args.bin +3 -0
  30. checkpoint-1800/README.md +202 -0
  31. checkpoint-1800/adapter_config.json +34 -0
  32. checkpoint-1800/adapter_model.safetensors +3 -0
  33. checkpoint-1800/optimizer.pt +3 -0
  34. checkpoint-1800/rng_state_0.pth +3 -0
  35. checkpoint-1800/rng_state_1.pth +3 -0
  36. checkpoint-1800/scheduler.pt +3 -0
  37. checkpoint-1800/special_tokens_map.json +24 -0
  38. checkpoint-1800/tokenizer.json +3 -0
  39. checkpoint-1800/tokenizer_config.json +0 -0
  40. checkpoint-1800/trainer_state.json +0 -0
  41. checkpoint-1800/training_args.bin +3 -0
  42. checkpoint-500/README.md +202 -0
  43. checkpoint-500/adapter_config.json +34 -0
  44. checkpoint-500/adapter_model.safetensors +3 -0
  45. checkpoint-500/optimizer.pt +3 -0
  46. checkpoint-500/rng_state_0.pth +3 -0
  47. checkpoint-500/rng_state_1.pth +3 -0
  48. checkpoint-500/scheduler.pt +3 -0
  49. checkpoint-500/special_tokens_map.json +24 -0
  50. checkpoint-500/tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ license: other
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: dpo
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # dpo
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407) on the heat_transfer_dpo dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.1331
22
+ - Rewards/chosen: -4.9675
23
+ - Rewards/rejected: -13.7312
24
+ - Rewards/accuracies: 0.9480
25
+ - Rewards/margins: 8.7637
26
+ - Logps/chosen: -224.7040
27
+ - Logps/rejected: -310.9190
28
+ - Logits/chosen: -1.4384
29
+ - Logits/rejected: -1.4474
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-06
49
+ - train_batch_size: 5
50
+ - eval_batch_size: 5
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 2
54
+ - total_train_batch_size: 10
55
+ - total_eval_batch_size: 10
56
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
57
+ - lr_scheduler_type: cosine
58
+ - lr_scheduler_warmup_ratio: 0.1
59
+ - num_epochs: 2
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/chosen | Logps/rejected | Logits/chosen | Logits/rejected |
64
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:------------:|:--------------:|:-------------:|:---------------:|
65
+ | 0.6939 | 0.0667 | 60 | 0.6921 | -0.0219 | -0.0246 | 0.5190 | 0.0026 | -175.2482 | -173.8529 | -1.4010 | -1.4008 |
66
+ | 0.6871 | 0.1333 | 120 | 0.6830 | -0.0278 | -0.0494 | 0.6080 | 0.0216 | -175.3069 | -174.1010 | -1.4030 | -1.4029 |
67
+ | 0.6159 | 0.2 | 180 | 0.6382 | -0.5399 | -0.7225 | 0.5610 | 0.1826 | -180.4279 | -180.8317 | -1.4021 | -1.4025 |
68
+ | 0.368 | 0.2667 | 240 | 0.3849 | -1.3538 | -2.7449 | 0.8310 | 1.3911 | -188.5674 | -201.0563 | -1.3971 | -1.3996 |
69
+ | 0.3234 | 0.3333 | 300 | 0.3633 | -2.1358 | -4.6104 | 0.8230 | 2.4747 | -196.3865 | -219.7114 | -1.4248 | -1.4282 |
70
+ | 0.2649 | 0.4 | 360 | 0.3037 | -3.3073 | -6.0363 | 0.8800 | 2.7290 | -208.1017 | -233.9699 | -1.4411 | -1.4450 |
71
+ | 0.1784 | 0.4667 | 420 | 0.2159 | -3.8934 | -7.0789 | 0.9100 | 3.1855 | -213.9628 | -244.3959 | -1.4470 | -1.4523 |
72
+ | 0.2608 | 0.5333 | 480 | 0.2073 | -3.8076 | -7.8889 | 0.9100 | 4.0813 | -213.1049 | -252.4960 | -1.4509 | -1.4571 |
73
+ | 0.2459 | 0.6 | 540 | 0.2173 | -4.7738 | -9.6025 | 0.8890 | 4.8287 | -222.7667 | -269.6319 | -1.4478 | -1.4529 |
74
+ | 0.1729 | 0.6667 | 600 | 0.2264 | -3.6641 | -9.1186 | 0.9200 | 5.4546 | -211.6696 | -264.7935 | -1.4379 | -1.4430 |
75
+ | 0.2136 | 0.7333 | 660 | 0.1994 | -3.1520 | -8.0180 | 0.9190 | 4.8660 | -206.5491 | -253.7874 | -1.4456 | -1.4518 |
76
+ | 0.2148 | 0.8 | 720 | 0.2623 | -3.3220 | -8.6375 | 0.9040 | 5.3155 | -208.2492 | -259.9820 | -1.4527 | -1.4588 |
77
+ | 0.151 | 0.8667 | 780 | 0.2628 | -3.7843 | -9.3305 | 0.8830 | 5.5462 | -212.8717 | -266.9124 | -1.4556 | -1.4621 |
78
+ | 0.1759 | 0.9333 | 840 | 0.1736 | -3.7518 | -9.3561 | 0.9270 | 5.6043 | -212.5472 | -267.1683 | -1.4565 | -1.4631 |
79
+ | 0.1455 | 1.0 | 900 | 0.1967 | -3.4547 | -10.0926 | 0.9290 | 6.6379 | -209.5764 | -274.5335 | -1.4551 | -1.4625 |
80
+ | 0.1456 | 1.0667 | 960 | 0.2037 | -3.9507 | -10.4184 | 0.9290 | 6.4677 | -214.5359 | -277.7913 | -1.4538 | -1.4610 |
81
+ | 0.1276 | 1.1333 | 1020 | 0.2090 | -3.7958 | -10.3930 | 0.9240 | 6.5972 | -212.9869 | -277.5373 | -1.4494 | -1.4568 |
82
+ | 0.1768 | 1.2 | 1080 | 0.1744 | -3.7397 | -10.8265 | 0.9350 | 7.0868 | -212.4255 | -281.8718 | -1.4487 | -1.4565 |
83
+ | 0.2379 | 1.2667 | 1140 | 0.1679 | -4.2998 | -11.1092 | 0.9260 | 6.8094 | -218.0269 | -284.6993 | -1.4458 | -1.4532 |
84
+ | 0.0571 | 1.3333 | 1200 | 0.1626 | -4.5185 | -12.4102 | 0.9420 | 7.8917 | -220.2143 | -297.7095 | -1.4335 | -1.4415 |
85
+ | 0.1644 | 1.4 | 1260 | 0.1614 | -4.3048 | -12.2288 | 0.9400 | 7.9240 | -218.0764 | -295.8950 | -1.4410 | -1.4497 |
86
+ | 0.3264 | 1.4667 | 1320 | 0.1427 | -4.5696 | -12.5596 | 0.9470 | 7.9900 | -220.7249 | -299.2028 | -1.4390 | -1.4475 |
87
+ | 0.1088 | 1.5333 | 1380 | 0.1382 | -4.6426 | -12.7848 | 0.9510 | 8.1422 | -221.4554 | -301.4557 | -1.4380 | -1.4465 |
88
+ | 0.1853 | 1.6 | 1440 | 0.1417 | -4.9985 | -13.2069 | 0.9490 | 8.2084 | -225.0136 | -305.6761 | -1.4349 | -1.4433 |
89
+ | 0.1406 | 1.6667 | 1500 | 0.1741 | -5.1167 | -13.8396 | 0.9410 | 8.7229 | -226.1956 | -312.0029 | -1.4283 | -1.4373 |
90
+ | 0.1751 | 1.7333 | 1560 | 0.1433 | -4.9687 | -13.7012 | 0.9480 | 8.7325 | -224.7161 | -310.6195 | -1.4309 | -1.4397 |
91
+ | 0.1648 | 1.8 | 1620 | 0.1368 | -4.9785 | -13.6896 | 0.9500 | 8.7111 | -224.8141 | -310.5035 | -1.4335 | -1.4424 |
92
+ | 0.1109 | 1.8667 | 1680 | 0.1367 | -5.0609 | -13.8370 | 0.9480 | 8.7762 | -225.6376 | -311.9777 | -1.4341 | -1.4430 |
93
+ | 0.1875 | 1.9333 | 1740 | 0.1388 | -5.0304 | -13.7910 | 0.9500 | 8.7607 | -225.3328 | -311.5176 | -1.4356 | -1.4445 |
94
+ | 0.0947 | 2.0 | 1800 | 0.1331 | -4.9675 | -13.7312 | 0.9480 | 8.7637 | -224.7040 | -310.9190 | -1.4384 | -1.4474 |
95
+
96
+
97
+ ### Framework versions
98
+
99
+ - PEFT 0.12.0
100
+ - Transformers 4.46.0
101
+ - Pytorch 2.4.0+cu121
102
+ - Datasets 2.21.0
103
+ - Tokenizers 0.20.1
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e929aec41513f971396ff6eb8d073add48bdab0433a50a2e50924265184fd9
3
+ size 114106856
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_logits/chosen": -1.4384021759033203,
4
+ "eval_logits/rejected": -1.4473795890808105,
5
+ "eval_logps/chosen": -224.70401000976562,
6
+ "eval_logps/rejected": -310.91900634765625,
7
+ "eval_loss": 0.1330825537443161,
8
+ "eval_rewards/accuracies": 0.9479999542236328,
9
+ "eval_rewards/chosen": -4.9675092697143555,
10
+ "eval_rewards/margins": 8.763671875,
11
+ "eval_rewards/rejected": -13.731181144714355,
12
+ "eval_runtime": 318.8194,
13
+ "eval_samples_per_second": 3.137,
14
+ "eval_steps_per_second": 0.314,
15
+ "total_flos": 2.867691724430377e+18,
16
+ "train_loss": 0.25175013176269,
17
+ "train_runtime": 22554.8302,
18
+ "train_samples_per_second": 0.798,
19
+ "train_steps_per_second": 0.08
20
+ }
checkpoint-1000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-1000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aadb08af006ffd6a1975cfc34e0638319f9f4a97dfe47633e3cf47b400a87dad
3
+ size 114106856
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ad9e1aa2b915da6f275e0d2cffa8b52dcbb42fcbf7be69897fa49860963c91
3
+ size 228536930
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:903543db97bc761040c5db0312325334d1969b37903d504a936c7b114834baf1
3
+ size 14512
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4804f2c10cb0d32144ae27f6945211f7856d030e6cc8a649abdc293875fae83e
3
+ size 14512
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56cd4acfa7c0815efb35ccb1d4ab5a88659d9bd93a8c62776e94b8a963cc1ebd
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
checkpoint-1000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,1789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.1111111111111112,
5
+ "eval_steps": 60,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.011111111111111112,
13
+ "grad_norm": 2.362602949142456,
14
+ "learning_rate": 2.7777777777777776e-07,
15
+ "logits/chosen": -1.400684118270874,
16
+ "logits/rejected": -1.4005341529846191,
17
+ "logps/chosen": -174.8197021484375,
18
+ "logps/rejected": -174.18280029296875,
19
+ "loss": 0.6981,
20
+ "rewards/accuracies": 0.3700000047683716,
21
+ "rewards/chosen": -0.017464280128479004,
22
+ "rewards/margins": -0.00935516320168972,
23
+ "rewards/rejected": -0.00810911599546671,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.022222222222222223,
28
+ "grad_norm": 2.706902027130127,
29
+ "learning_rate": 5.555555555555555e-07,
30
+ "logits/chosen": -1.401512622833252,
31
+ "logits/rejected": -1.4014896154403687,
32
+ "logps/chosen": -172.8441162109375,
33
+ "logps/rejected": -176.39537048339844,
34
+ "loss": 0.6945,
35
+ "rewards/accuracies": 0.4599999785423279,
36
+ "rewards/chosen": -0.015734069049358368,
37
+ "rewards/margins": -0.0022257084492594004,
38
+ "rewards/rejected": -0.01350836269557476,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.03333333333333333,
43
+ "grad_norm": 2.120821714401245,
44
+ "learning_rate": 8.333333333333333e-07,
45
+ "logits/chosen": -1.3998275995254517,
46
+ "logits/rejected": -1.3999087810516357,
47
+ "logps/chosen": -173.80712890625,
48
+ "logps/rejected": -175.36126708984375,
49
+ "loss": 0.6927,
50
+ "rewards/accuracies": 0.5099999904632568,
51
+ "rewards/chosen": -0.00933685339987278,
52
+ "rewards/margins": 0.0013576654018834233,
53
+ "rewards/rejected": -0.010694518685340881,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.044444444444444446,
58
+ "grad_norm": 0.6226487159729004,
59
+ "learning_rate": 1.111111111111111e-06,
60
+ "logits/chosen": -1.4011458158493042,
61
+ "logits/rejected": -1.4012081623077393,
62
+ "logps/chosen": -173.29324340820312,
63
+ "logps/rejected": -175.90345764160156,
64
+ "loss": 0.6926,
65
+ "rewards/accuracies": 0.5099999904632568,
66
+ "rewards/chosen": -0.02281300537288189,
67
+ "rewards/margins": 0.0015505983028560877,
68
+ "rewards/rejected": -0.024363603442907333,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.05555555555555555,
73
+ "grad_norm": 2.68591046333313,
74
+ "learning_rate": 1.3888888888888892e-06,
75
+ "logits/chosen": -1.4008080959320068,
76
+ "logits/rejected": -1.4006825685501099,
77
+ "logps/chosen": -175.80612182617188,
78
+ "logps/rejected": -173.04119873046875,
79
+ "loss": 0.6942,
80
+ "rewards/accuracies": 0.5000000596046448,
81
+ "rewards/chosen": -0.014659256674349308,
82
+ "rewards/margins": -0.0015078135766088963,
83
+ "rewards/rejected": -0.013151444494724274,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.06666666666666667,
88
+ "grad_norm": 0.6941749453544617,
89
+ "learning_rate": 1.6666666666666667e-06,
90
+ "logits/chosen": -1.4003050327301025,
91
+ "logits/rejected": -1.4006407260894775,
92
+ "logps/chosen": -174.0802001953125,
93
+ "logps/rejected": -175.01547241210938,
94
+ "loss": 0.6939,
95
+ "rewards/accuracies": 0.5100000500679016,
96
+ "rewards/chosen": -0.026361756026744843,
97
+ "rewards/margins": -0.0008379966020584106,
98
+ "rewards/rejected": -0.025523759424686432,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.06666666666666667,
103
+ "eval_logits/chosen": -1.4009861946105957,
104
+ "eval_logits/rejected": -1.4008183479309082,
105
+ "eval_logps/chosen": -175.24819946289062,
106
+ "eval_logps/rejected": -173.85289001464844,
107
+ "eval_loss": 0.6920965313911438,
108
+ "eval_rewards/accuracies": 0.5189999938011169,
109
+ "eval_rewards/chosen": -0.021925970911979675,
110
+ "eval_rewards/margins": 0.0026434571482241154,
111
+ "eval_rewards/rejected": -0.024569429457187653,
112
+ "eval_runtime": 318.9511,
113
+ "eval_samples_per_second": 3.135,
114
+ "eval_steps_per_second": 0.314,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.07777777777777778,
119
+ "grad_norm": 1.3399503231048584,
120
+ "learning_rate": 1.944444444444445e-06,
121
+ "logits/chosen": -1.4007337093353271,
122
+ "logits/rejected": -1.4006619453430176,
123
+ "logps/chosen": -173.1317138671875,
124
+ "logps/rejected": -175.83157348632812,
125
+ "loss": 0.6926,
126
+ "rewards/accuracies": 0.5200000405311584,
127
+ "rewards/chosen": -0.02405247837305069,
128
+ "rewards/margins": 0.001808380475267768,
129
+ "rewards/rejected": -0.025860857218503952,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.08888888888888889,
134
+ "grad_norm": 4.030770778656006,
135
+ "learning_rate": 2.222222222222222e-06,
136
+ "logits/chosen": -1.400660753250122,
137
+ "logits/rejected": -1.4007993936538696,
138
+ "logps/chosen": -172.63229370117188,
139
+ "logps/rejected": -176.5906524658203,
140
+ "loss": 0.6849,
141
+ "rewards/accuracies": 0.5900000333786011,
142
+ "rewards/chosen": -0.013674546033143997,
143
+ "rewards/margins": 0.017781419679522514,
144
+ "rewards/rejected": -0.03145596385002136,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.1,
149
+ "grad_norm": 7.06594181060791,
150
+ "learning_rate": 2.5e-06,
151
+ "logits/chosen": -1.4002556800842285,
152
+ "logits/rejected": -1.400156021118164,
153
+ "logps/chosen": -176.54403686523438,
154
+ "logps/rejected": -172.20162963867188,
155
+ "loss": 0.6946,
156
+ "rewards/accuracies": 0.44999998807907104,
157
+ "rewards/chosen": -0.01780758798122406,
158
+ "rewards/margins": -0.0011917415540665388,
159
+ "rewards/rejected": -0.016615845263004303,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.1111111111111111,
164
+ "grad_norm": 4.663311004638672,
165
+ "learning_rate": 2.7777777777777783e-06,
166
+ "logits/chosen": -1.40169358253479,
167
+ "logits/rejected": -1.4018887281417847,
168
+ "logps/chosen": -174.86729431152344,
169
+ "logps/rejected": -174.28994750976562,
170
+ "loss": 0.6925,
171
+ "rewards/accuracies": 0.5099999904632568,
172
+ "rewards/chosen": -0.020927399396896362,
173
+ "rewards/margins": 0.002672073431313038,
174
+ "rewards/rejected": -0.023599475622177124,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.12222222222222222,
179
+ "grad_norm": 2.7771716117858887,
180
+ "learning_rate": 3.055555555555556e-06,
181
+ "logits/chosen": -1.4018511772155762,
182
+ "logits/rejected": -1.401686668395996,
183
+ "logps/chosen": -175.4040069580078,
184
+ "logps/rejected": -173.77352905273438,
185
+ "loss": 0.6917,
186
+ "rewards/accuracies": 0.5,
187
+ "rewards/chosen": -0.02241549640893936,
188
+ "rewards/margins": 0.004419571254402399,
189
+ "rewards/rejected": -0.026835069060325623,
190
+ "step": 110
191
+ },
192
+ {
193
+ "epoch": 0.13333333333333333,
194
+ "grad_norm": 2.225400686264038,
195
+ "learning_rate": 3.3333333333333333e-06,
196
+ "logits/chosen": -1.4029500484466553,
197
+ "logits/rejected": -1.4027996063232422,
198
+ "logps/chosen": -175.29742431640625,
199
+ "logps/rejected": -174.22561645507812,
200
+ "loss": 0.6871,
201
+ "rewards/accuracies": 0.5600000023841858,
202
+ "rewards/chosen": -0.03488890081644058,
203
+ "rewards/margins": 0.013112092390656471,
204
+ "rewards/rejected": -0.0480009950697422,
205
+ "step": 120
206
+ },
207
+ {
208
+ "epoch": 0.13333333333333333,
209
+ "eval_logits/chosen": -1.403046727180481,
210
+ "eval_logits/rejected": -1.4029061794281006,
211
+ "eval_logps/chosen": -175.306884765625,
212
+ "eval_logps/rejected": -174.10104370117188,
213
+ "eval_loss": 0.6829859018325806,
214
+ "eval_rewards/accuracies": 0.6079999804496765,
215
+ "eval_rewards/chosen": -0.027797138318419456,
216
+ "eval_rewards/margins": 0.021586475893855095,
217
+ "eval_rewards/rejected": -0.04938361421227455,
218
+ "eval_runtime": 319.5591,
219
+ "eval_samples_per_second": 3.129,
220
+ "eval_steps_per_second": 0.313,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.14444444444444443,
225
+ "grad_norm": 4.428592205047607,
226
+ "learning_rate": 3.6111111111111115e-06,
227
+ "logits/chosen": -1.4035028219223022,
228
+ "logits/rejected": -1.403373122215271,
229
+ "logps/chosen": -175.11550903320312,
230
+ "logps/rejected": -174.84075927734375,
231
+ "loss": 0.6805,
232
+ "rewards/accuracies": 0.6200000643730164,
233
+ "rewards/chosen": -0.05135633796453476,
234
+ "rewards/margins": 0.027625277638435364,
235
+ "rewards/rejected": -0.07898162305355072,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.15555555555555556,
240
+ "grad_norm": 1.5452574491500854,
241
+ "learning_rate": 3.88888888888889e-06,
242
+ "logits/chosen": -1.4023932218551636,
243
+ "logits/rejected": -1.402073621749878,
244
+ "logps/chosen": -174.4642791748047,
245
+ "logps/rejected": -176.83168029785156,
246
+ "loss": 0.6804,
247
+ "rewards/accuracies": 0.6299999952316284,
248
+ "rewards/chosen": -0.11876146495342255,
249
+ "rewards/margins": 0.02933622896671295,
250
+ "rewards/rejected": -0.1480976939201355,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.16666666666666666,
255
+ "grad_norm": 0.9253703951835632,
256
+ "learning_rate": 4.166666666666667e-06,
257
+ "logits/chosen": -1.4006946086883545,
258
+ "logits/rejected": -1.400911808013916,
259
+ "logps/chosen": -176.50845336914062,
260
+ "logps/rejected": -175.89736938476562,
261
+ "loss": 0.6765,
262
+ "rewards/accuracies": 0.6399999856948853,
263
+ "rewards/chosen": -0.17112146317958832,
264
+ "rewards/margins": 0.03798893839120865,
265
+ "rewards/rejected": -0.20911039412021637,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.17777777777777778,
270
+ "grad_norm": 4.935380935668945,
271
+ "learning_rate": 4.444444444444444e-06,
272
+ "logits/chosen": -1.399414300918579,
273
+ "logits/rejected": -1.399838924407959,
274
+ "logps/chosen": -176.39724731445312,
275
+ "logps/rejected": -178.42300415039062,
276
+ "loss": 0.6537,
277
+ "rewards/accuracies": 0.7100000381469727,
278
+ "rewards/chosen": -0.25529032945632935,
279
+ "rewards/margins": 0.08817656338214874,
280
+ "rewards/rejected": -0.3434668779373169,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.18888888888888888,
285
+ "grad_norm": 1.3383221626281738,
286
+ "learning_rate": 4.722222222222222e-06,
287
+ "logits/chosen": -1.3981242179870605,
288
+ "logits/rejected": -1.398409128189087,
289
+ "logps/chosen": -179.46847534179688,
290
+ "logps/rejected": -177.86688232421875,
291
+ "loss": 0.684,
292
+ "rewards/accuracies": 0.550000011920929,
293
+ "rewards/chosen": -0.40547820925712585,
294
+ "rewards/margins": 0.04951518028974533,
295
+ "rewards/rejected": -0.4549933969974518,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.2,
300
+ "grad_norm": 6.545588493347168,
301
+ "learning_rate": 5e-06,
302
+ "logits/chosen": -1.3984978199005127,
303
+ "logits/rejected": -1.3985638618469238,
304
+ "logps/chosen": -180.9668426513672,
305
+ "logps/rejected": -178.43746948242188,
306
+ "loss": 0.6159,
307
+ "rewards/accuracies": 0.7599999904632568,
308
+ "rewards/chosen": -0.4513840079307556,
309
+ "rewards/margins": 0.18196940422058105,
310
+ "rewards/rejected": -0.6333533525466919,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.2,
315
+ "eval_logits/chosen": -1.4020743370056152,
316
+ "eval_logits/rejected": -1.402461051940918,
317
+ "eval_logps/chosen": -180.4278564453125,
318
+ "eval_logps/rejected": -180.83172607421875,
319
+ "eval_loss": 0.6382298469543457,
320
+ "eval_rewards/accuracies": 0.5610000491142273,
321
+ "eval_rewards/chosen": -0.5398944616317749,
322
+ "eval_rewards/margins": 0.18255746364593506,
323
+ "eval_rewards/rejected": -0.72245192527771,
324
+ "eval_runtime": 319.2836,
325
+ "eval_samples_per_second": 3.132,
326
+ "eval_steps_per_second": 0.313,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 0.2111111111111111,
331
+ "grad_norm": 2.0203661918640137,
332
+ "learning_rate": 4.999529926121254e-06,
333
+ "logits/chosen": -1.396078109741211,
334
+ "logits/rejected": -1.3954544067382812,
335
+ "logps/chosen": -180.74969482421875,
336
+ "logps/rejected": -182.64613342285156,
337
+ "loss": 0.6337,
338
+ "rewards/accuracies": 0.5700000524520874,
339
+ "rewards/chosen": -0.6385375261306763,
340
+ "rewards/margins": 0.19739526510238647,
341
+ "rewards/rejected": -0.8359327912330627,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 0.2222222222222222,
346
+ "grad_norm": 5.894029140472412,
347
+ "learning_rate": 4.998119881260576e-06,
348
+ "logits/chosen": -1.390157699584961,
349
+ "logits/rejected": -1.3912606239318848,
350
+ "logps/chosen": -181.57754516601562,
351
+ "logps/rejected": -183.00576782226562,
352
+ "loss": 0.5749,
353
+ "rewards/accuracies": 0.8199999928474426,
354
+ "rewards/chosen": -0.6484101414680481,
355
+ "rewards/margins": 0.3046451807022095,
356
+ "rewards/rejected": -0.9530552625656128,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.23333333333333334,
361
+ "grad_norm": 4.795431613922119,
362
+ "learning_rate": 4.995770395678171e-06,
363
+ "logits/chosen": -1.390209436416626,
364
+ "logits/rejected": -1.3919038772583008,
365
+ "logps/chosen": -181.8658447265625,
366
+ "logps/rejected": -183.79417419433594,
367
+ "loss": 0.5556,
368
+ "rewards/accuracies": 0.75,
369
+ "rewards/chosen": -0.6416223049163818,
370
+ "rewards/margins": 0.4046136736869812,
371
+ "rewards/rejected": -1.0462360382080078,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.24444444444444444,
376
+ "grad_norm": 8.91357421875,
377
+ "learning_rate": 4.99248235291948e-06,
378
+ "logits/chosen": -1.3888887166976929,
379
+ "logits/rejected": -1.3894532918930054,
380
+ "logps/chosen": -179.56829833984375,
381
+ "logps/rejected": -189.20083618164062,
382
+ "loss": 0.4952,
383
+ "rewards/accuracies": 0.800000011920929,
384
+ "rewards/chosen": -0.6512977480888367,
385
+ "rewards/margins": 0.7006000876426697,
386
+ "rewards/rejected": -1.3518978357315063,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 0.25555555555555554,
391
+ "grad_norm": 14.271614074707031,
392
+ "learning_rate": 4.9882569894829146e-06,
393
+ "logits/chosen": -1.3921380043029785,
394
+ "logits/rejected": -1.393751859664917,
395
+ "logps/chosen": -185.2764892578125,
396
+ "logps/rejected": -192.3001708984375,
397
+ "loss": 0.5098,
398
+ "rewards/accuracies": 0.7599999904632568,
399
+ "rewards/chosen": -1.0151185989379883,
400
+ "rewards/margins": 0.8646041750907898,
401
+ "rewards/rejected": -1.8797227144241333,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 0.26666666666666666,
406
+ "grad_norm": 2.420156240463257,
407
+ "learning_rate": 4.983095894354858e-06,
408
+ "logits/chosen": -1.39105224609375,
409
+ "logits/rejected": -1.392564296722412,
410
+ "logps/chosen": -186.03451538085938,
411
+ "logps/rejected": -201.23435974121094,
412
+ "loss": 0.368,
413
+ "rewards/accuracies": 0.8300000429153442,
414
+ "rewards/chosen": -1.2737812995910645,
415
+ "rewards/margins": 1.303347110748291,
416
+ "rewards/rejected": -2.5771284103393555,
417
+ "step": 240
418
+ },
419
+ {
420
+ "epoch": 0.26666666666666666,
421
+ "eval_logits/chosen": -1.3971052169799805,
422
+ "eval_logits/rejected": -1.3996238708496094,
423
+ "eval_logps/chosen": -188.56735229492188,
424
+ "eval_logps/rejected": -201.0563201904297,
425
+ "eval_loss": 0.3848608434200287,
426
+ "eval_rewards/accuracies": 0.8309999704360962,
427
+ "eval_rewards/chosen": -1.3538421392440796,
428
+ "eval_rewards/margins": 1.3910682201385498,
429
+ "eval_rewards/rejected": -2.74491024017334,
430
+ "eval_runtime": 319.0097,
431
+ "eval_samples_per_second": 3.135,
432
+ "eval_steps_per_second": 0.313,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 0.2777777777777778,
437
+ "grad_norm": 14.02056884765625,
438
+ "learning_rate": 4.977001008412113e-06,
439
+ "logits/chosen": -1.3970434665679932,
440
+ "logits/rejected": -1.400298833847046,
441
+ "logps/chosen": -185.9792022705078,
442
+ "logps/rejected": -203.23114013671875,
443
+ "loss": 0.324,
444
+ "rewards/accuracies": 0.8600000143051147,
445
+ "rewards/chosen": -1.1137562990188599,
446
+ "rewards/margins": 1.8328487873077393,
447
+ "rewards/rejected": -2.9466049671173096,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 0.28888888888888886,
452
+ "grad_norm": 3.589820146560669,
453
+ "learning_rate": 4.969974623692023e-06,
454
+ "logits/chosen": -1.4056309461593628,
455
+ "logits/rejected": -1.4085218906402588,
456
+ "logps/chosen": -185.17918395996094,
457
+ "logps/rejected": -209.30335998535156,
458
+ "loss": 0.2772,
459
+ "rewards/accuracies": 0.8800000548362732,
460
+ "rewards/chosen": -1.051544189453125,
461
+ "rewards/margins": 2.4677376747131348,
462
+ "rewards/rejected": -3.5192818641662598,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 0.3,
467
+ "grad_norm": 4.202933311462402,
468
+ "learning_rate": 4.962019382530521e-06,
469
+ "logits/chosen": -1.4178866147994995,
470
+ "logits/rejected": -1.4198402166366577,
471
+ "logps/chosen": -191.2581329345703,
472
+ "logps/rejected": -217.56085205078125,
473
+ "loss": 0.2959,
474
+ "rewards/accuracies": 0.8700000047683716,
475
+ "rewards/chosen": -1.7226934432983398,
476
+ "rewards/margins": 2.5767905712127686,
477
+ "rewards/rejected": -4.2994842529296875,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 0.3111111111111111,
482
+ "grad_norm": 4.351930141448975,
483
+ "learning_rate": 4.953138276568462e-06,
484
+ "logits/chosen": -1.4250727891921997,
485
+ "logits/rejected": -1.427567720413208,
486
+ "logps/chosen": -200.7665557861328,
487
+ "logps/rejected": -221.02357482910156,
488
+ "loss": 0.4344,
489
+ "rewards/accuracies": 0.7900000214576721,
490
+ "rewards/chosen": -2.566577434539795,
491
+ "rewards/margins": 2.1860404014587402,
492
+ "rewards/rejected": -4.752617835998535,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.32222222222222224,
497
+ "grad_norm": 9.703364372253418,
498
+ "learning_rate": 4.943334645626589e-06,
499
+ "logits/chosen": -1.4243228435516357,
500
+ "logits/rejected": -1.4278262853622437,
501
+ "logps/chosen": -197.0714111328125,
502
+ "logps/rejected": -221.6966552734375,
503
+ "loss": 0.3466,
504
+ "rewards/accuracies": 0.8199999928474426,
505
+ "rewards/chosen": -2.2351460456848145,
506
+ "rewards/margins": 2.5265071392059326,
507
+ "rewards/rejected": -4.761653900146484,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.3333333333333333,
512
+ "grad_norm": 14.332489967346191,
513
+ "learning_rate": 4.93261217644956e-06,
514
+ "logits/chosen": -1.4260220527648926,
515
+ "logits/rejected": -1.4290738105773926,
516
+ "logps/chosen": -194.31724548339844,
517
+ "logps/rejected": -221.0859832763672,
518
+ "loss": 0.3234,
519
+ "rewards/accuracies": 0.8800000548362732,
520
+ "rewards/chosen": -2.019387722015381,
521
+ "rewards/margins": 2.6289873123168945,
522
+ "rewards/rejected": -4.648375034332275,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.3333333333333333,
527
+ "eval_logits/chosen": -1.4247881174087524,
528
+ "eval_logits/rejected": -1.4282124042510986,
529
+ "eval_logps/chosen": -196.38650512695312,
530
+ "eval_logps/rejected": -219.71144104003906,
531
+ "eval_loss": 0.3633359372615814,
532
+ "eval_rewards/accuracies": 0.8229999542236328,
533
+ "eval_rewards/chosen": -2.135758876800537,
534
+ "eval_rewards/margins": 2.4746649265289307,
535
+ "eval_rewards/rejected": -4.610424041748047,
536
+ "eval_runtime": 319.0479,
537
+ "eval_samples_per_second": 3.134,
538
+ "eval_steps_per_second": 0.313,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 0.34444444444444444,
543
+ "grad_norm": 26.149131774902344,
544
+ "learning_rate": 4.9209749013195155e-06,
545
+ "logits/chosen": -1.4286975860595703,
546
+ "logits/rejected": -1.43110990524292,
547
+ "logps/chosen": -191.86825561523438,
548
+ "logps/rejected": -218.36767578125,
549
+ "loss": 0.3799,
550
+ "rewards/accuracies": 0.8300000429153442,
551
+ "rewards/chosen": -1.8485496044158936,
552
+ "rewards/margins": 2.443417549133301,
553
+ "rewards/rejected": -4.291967391967773,
554
+ "step": 310
555
+ },
556
+ {
557
+ "epoch": 0.35555555555555557,
558
+ "grad_norm": 18.254680633544922,
559
+ "learning_rate": 4.908427196539701e-06,
560
+ "logits/chosen": -1.4264110326766968,
561
+ "logits/rejected": -1.4311984777450562,
562
+ "logps/chosen": -196.54238891601562,
563
+ "logps/rejected": -215.0438232421875,
564
+ "loss": 0.3149,
565
+ "rewards/accuracies": 0.8399999737739563,
566
+ "rewards/chosen": -2.009295701980591,
567
+ "rewards/margins": 2.2745771408081055,
568
+ "rewards/rejected": -4.283872604370117,
569
+ "step": 320
570
+ },
571
+ {
572
+ "epoch": 0.36666666666666664,
573
+ "grad_norm": 20.668800354003906,
574
+ "learning_rate": 4.894973780788722e-06,
575
+ "logits/chosen": -1.4264931678771973,
576
+ "logits/rejected": -1.4278137683868408,
577
+ "logps/chosen": -198.57382202148438,
578
+ "logps/rejected": -217.05438232421875,
579
+ "loss": 0.4159,
580
+ "rewards/accuracies": 0.8100000023841858,
581
+ "rewards/chosen": -2.5315957069396973,
582
+ "rewards/margins": 1.5949325561523438,
583
+ "rewards/rejected": -4.126528739929199,
584
+ "step": 330
585
+ },
586
+ {
587
+ "epoch": 0.37777777777777777,
588
+ "grad_norm": 4.467871189117432,
589
+ "learning_rate": 4.8806197133460385e-06,
590
+ "logits/chosen": -1.4277429580688477,
591
+ "logits/rejected": -1.4302550554275513,
592
+ "logps/chosen": -204.53775024414062,
593
+ "logps/rejected": -220.16055297851562,
594
+ "loss": 0.3476,
595
+ "rewards/accuracies": 0.8499999642372131,
596
+ "rewards/chosen": -2.966139316558838,
597
+ "rewards/margins": 1.6775035858154297,
598
+ "rewards/rejected": -4.643642425537109,
599
+ "step": 340
600
+ },
601
+ {
602
+ "epoch": 0.3888888888888889,
603
+ "grad_norm": 7.6644816398620605,
604
+ "learning_rate": 4.865370392189377e-06,
605
+ "logits/chosen": -1.43019437789917,
606
+ "logits/rejected": -1.4324309825897217,
607
+ "logps/chosen": -203.60850524902344,
608
+ "logps/rejected": -224.7152862548828,
609
+ "loss": 0.2798,
610
+ "rewards/accuracies": 0.8700000047683716,
611
+ "rewards/chosen": -2.942948818206787,
612
+ "rewards/margins": 2.063199996948242,
613
+ "rewards/rejected": -5.006148338317871,
614
+ "step": 350
615
+ },
616
+ {
617
+ "epoch": 0.4,
618
+ "grad_norm": 13.925436019897461,
619
+ "learning_rate": 4.849231551964771e-06,
620
+ "logits/chosen": -1.4413893222808838,
621
+ "logits/rejected": -1.4455211162567139,
622
+ "logps/chosen": -205.908447265625,
623
+ "logps/rejected": -226.8455810546875,
624
+ "loss": 0.2649,
625
+ "rewards/accuracies": 0.8700000643730164,
626
+ "rewards/chosen": -3.027750253677368,
627
+ "rewards/margins": 2.361143112182617,
628
+ "rewards/rejected": -5.388893127441406,
629
+ "step": 360
630
+ },
631
+ {
632
+ "epoch": 0.4,
633
+ "eval_logits/chosen": -1.4411193132400513,
634
+ "eval_logits/rejected": -1.4450273513793945,
635
+ "eval_logps/chosen": -208.10166931152344,
636
+ "eval_logps/rejected": -233.96986389160156,
637
+ "eval_loss": 0.3037000298500061,
638
+ "eval_rewards/accuracies": 0.8799999952316284,
639
+ "eval_rewards/chosen": -3.3072755336761475,
640
+ "eval_rewards/margins": 2.7289905548095703,
641
+ "eval_rewards/rejected": -6.036265850067139,
642
+ "eval_runtime": 318.9985,
643
+ "eval_samples_per_second": 3.135,
644
+ "eval_steps_per_second": 0.313,
645
+ "step": 360
646
+ },
647
+ {
648
+ "epoch": 0.4111111111111111,
649
+ "grad_norm": 4.158270835876465,
650
+ "learning_rate": 4.832209261830002e-06,
651
+ "logits/chosen": -1.4424656629562378,
652
+ "logits/rejected": -1.4435977935791016,
653
+ "logps/chosen": -208.0481414794922,
654
+ "logps/rejected": -236.48324584960938,
655
+ "loss": 0.353,
656
+ "rewards/accuracies": 0.8700000047683716,
657
+ "rewards/chosen": -3.5577611923217773,
658
+ "rewards/margins": 2.4773597717285156,
659
+ "rewards/rejected": -6.035120964050293,
660
+ "step": 370
661
+ },
662
+ {
663
+ "epoch": 0.4222222222222222,
664
+ "grad_norm": 3.5422561168670654,
665
+ "learning_rate": 4.814309923172227e-06,
666
+ "logits/chosen": -1.4452104568481445,
667
+ "logits/rejected": -1.4488377571105957,
668
+ "logps/chosen": -204.97947692871094,
669
+ "logps/rejected": -231.20712280273438,
670
+ "loss": 0.3429,
671
+ "rewards/accuracies": 0.8300000429153442,
672
+ "rewards/chosen": -3.058133125305176,
673
+ "rewards/margins": 2.625974178314209,
674
+ "rewards/rejected": -5.684107780456543,
675
+ "step": 380
676
+ },
677
+ {
678
+ "epoch": 0.43333333333333335,
679
+ "grad_norm": 16.114534378051758,
680
+ "learning_rate": 4.7955402672006855e-06,
681
+ "logits/chosen": -1.440530776977539,
682
+ "logits/rejected": -1.4443151950836182,
683
+ "logps/chosen": -205.27835083007812,
684
+ "logps/rejected": -236.82347106933594,
685
+ "loss": 0.2045,
686
+ "rewards/accuracies": 0.9300000071525574,
687
+ "rewards/chosen": -3.113431215286255,
688
+ "rewards/margins": 3.1173110008239746,
689
+ "rewards/rejected": -6.23074197769165,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 0.4444444444444444,
694
+ "grad_norm": 22.146488189697266,
695
+ "learning_rate": 4.775907352415367e-06,
696
+ "logits/chosen": -1.4472781419754028,
697
+ "logits/rejected": -1.4499727487564087,
698
+ "logps/chosen": -199.02243041992188,
699
+ "logps/rejected": -242.6939697265625,
700
+ "loss": 0.2361,
701
+ "rewards/accuracies": 0.9100000262260437,
702
+ "rewards/chosen": -2.694483757019043,
703
+ "rewards/margins": 3.9368107318878174,
704
+ "rewards/rejected": -6.631294250488281,
705
+ "step": 400
706
+ },
707
+ {
708
+ "epoch": 0.45555555555555555,
709
+ "grad_norm": 16.819496154785156,
710
+ "learning_rate": 4.755418561952595e-06,
711
+ "logits/chosen": -1.4456830024719238,
712
+ "logits/rejected": -1.4492114782333374,
713
+ "logps/chosen": -207.8698272705078,
714
+ "logps/rejected": -238.99583435058594,
715
+ "loss": 0.2863,
716
+ "rewards/accuracies": 0.9100000858306885,
717
+ "rewards/chosen": -3.3276515007019043,
718
+ "rewards/margins": 3.1782994270324707,
719
+ "rewards/rejected": -6.505950927734375,
720
+ "step": 410
721
+ },
722
+ {
723
+ "epoch": 0.4666666666666667,
724
+ "grad_norm": 15.385212898254395,
725
+ "learning_rate": 4.734081600808531e-06,
726
+ "logits/chosen": -1.448960542678833,
727
+ "logits/rejected": -1.4532960653305054,
728
+ "logps/chosen": -210.46075439453125,
729
+ "logps/rejected": -245.5928955078125,
730
+ "loss": 0.1784,
731
+ "rewards/accuracies": 0.9300000667572021,
732
+ "rewards/chosen": -3.5726406574249268,
733
+ "rewards/margins": 3.5739850997924805,
734
+ "rewards/rejected": -7.146625995635986,
735
+ "step": 420
736
+ },
737
+ {
738
+ "epoch": 0.4666666666666667,
739
+ "eval_logits/chosen": -1.4469826221466064,
740
+ "eval_logits/rejected": -1.4523011445999146,
741
+ "eval_logps/chosen": -213.9627685546875,
742
+ "eval_logps/rejected": -244.39593505859375,
743
+ "eval_loss": 0.2159292995929718,
744
+ "eval_rewards/accuracies": 0.9099999666213989,
745
+ "eval_rewards/chosen": -3.8933866024017334,
746
+ "eval_rewards/margins": 3.185485601425171,
747
+ "eval_rewards/rejected": -7.078872203826904,
748
+ "eval_runtime": 319.0594,
749
+ "eval_samples_per_second": 3.134,
750
+ "eval_steps_per_second": 0.313,
751
+ "step": 420
752
+ },
753
+ {
754
+ "epoch": 0.4777777777777778,
755
+ "grad_norm": 35.55814743041992,
756
+ "learning_rate": 4.711904492941644e-06,
757
+ "logits/chosen": -1.4515868425369263,
758
+ "logits/rejected": -1.4541680812835693,
759
+ "logps/chosen": -207.43453979492188,
760
+ "logps/rejected": -246.10247802734375,
761
+ "loss": 0.2279,
762
+ "rewards/accuracies": 0.9099999666213989,
763
+ "rewards/chosen": -3.5352389812469482,
764
+ "rewards/margins": 3.39831805229187,
765
+ "rewards/rejected": -6.933557033538818,
766
+ "step": 430
767
+ },
768
+ {
769
+ "epoch": 0.4888888888888889,
770
+ "grad_norm": 18.41891098022461,
771
+ "learning_rate": 4.688895578255228e-06,
772
+ "logits/chosen": -1.4477709531784058,
773
+ "logits/rejected": -1.4553776979446411,
774
+ "logps/chosen": -215.75033569335938,
775
+ "logps/rejected": -245.9658203125,
776
+ "loss": 0.2779,
777
+ "rewards/accuracies": 0.8600000143051147,
778
+ "rewards/chosen": -3.823634147644043,
779
+ "rewards/margins": 3.665213108062744,
780
+ "rewards/rejected": -7.488846778869629,
781
+ "step": 440
782
+ },
783
+ {
784
+ "epoch": 0.5,
785
+ "grad_norm": 15.392614364624023,
786
+ "learning_rate": 4.665063509461098e-06,
787
+ "logits/chosen": -1.4473040103912354,
788
+ "logits/rejected": -1.4520621299743652,
789
+ "logps/chosen": -212.28256225585938,
790
+ "logps/rejected": -245.33755493164062,
791
+ "loss": 0.2924,
792
+ "rewards/accuracies": 0.89000004529953,
793
+ "rewards/chosen": -3.778430461883545,
794
+ "rewards/margins": 3.308140277862549,
795
+ "rewards/rejected": -7.086570739746094,
796
+ "step": 450
797
+ },
798
+ {
799
+ "epoch": 0.5111111111111111,
800
+ "grad_norm": 19.698705673217773,
801
+ "learning_rate": 4.640417248825667e-06,
802
+ "logits/chosen": -1.4431393146514893,
803
+ "logits/rejected": -1.4465763568878174,
804
+ "logps/chosen": -209.155517578125,
805
+ "logps/rejected": -247.68649291992188,
806
+ "loss": 0.1966,
807
+ "rewards/accuracies": 0.9200000166893005,
808
+ "rewards/chosen": -3.5632858276367188,
809
+ "rewards/margins": 3.6926655769348145,
810
+ "rewards/rejected": -7.255951404571533,
811
+ "step": 460
812
+ },
813
+ {
814
+ "epoch": 0.5222222222222223,
815
+ "grad_norm": 3.17411208152771,
816
+ "learning_rate": 4.614966064799603e-06,
817
+ "logits/chosen": -1.4454569816589355,
818
+ "logits/rejected": -1.4508020877838135,
819
+ "logps/chosen": -214.06642150878906,
820
+ "logps/rejected": -249.29022216796875,
821
+ "loss": 0.1576,
822
+ "rewards/accuracies": 0.9399999976158142,
823
+ "rewards/chosen": -3.891676902770996,
824
+ "rewards/margins": 3.6903645992279053,
825
+ "rewards/rejected": -7.5820417404174805,
826
+ "step": 470
827
+ },
828
+ {
829
+ "epoch": 0.5333333333333333,
830
+ "grad_norm": 3.511045455932617,
831
+ "learning_rate": 4.588719528532342e-06,
832
+ "logits/chosen": -1.4526777267456055,
833
+ "logits/rejected": -1.4565974473953247,
834
+ "logps/chosen": -209.6256103515625,
835
+ "logps/rejected": -252.88116455078125,
836
+ "loss": 0.2608,
837
+ "rewards/accuracies": 0.8700000643730164,
838
+ "rewards/chosen": -3.6902856826782227,
839
+ "rewards/margins": 4.007488250732422,
840
+ "rewards/rejected": -7.6977739334106445,
841
+ "step": 480
842
+ },
843
+ {
844
+ "epoch": 0.5333333333333333,
845
+ "eval_logits/chosen": -1.4509010314941406,
846
+ "eval_logits/rejected": -1.4571257829666138,
847
+ "eval_logps/chosen": -213.10494995117188,
848
+ "eval_logps/rejected": -252.49603271484375,
849
+ "eval_loss": 0.20726382732391357,
850
+ "eval_rewards/accuracies": 0.9099999666213989,
851
+ "eval_rewards/chosen": -3.8076045513153076,
852
+ "eval_rewards/margins": 4.0812788009643555,
853
+ "eval_rewards/rejected": -7.888883590698242,
854
+ "eval_runtime": 319.0436,
855
+ "eval_samples_per_second": 3.134,
856
+ "eval_steps_per_second": 0.313,
857
+ "step": 480
858
+ },
859
+ {
860
+ "epoch": 0.5444444444444444,
861
+ "grad_norm": 35.65738296508789,
862
+ "learning_rate": 4.561687510272767e-06,
863
+ "logits/chosen": -1.4541469812393188,
864
+ "logits/rejected": -1.4597184658050537,
865
+ "logps/chosen": -213.66517639160156,
866
+ "logps/rejected": -254.37350463867188,
867
+ "loss": 0.2904,
868
+ "rewards/accuracies": 0.8899999856948853,
869
+ "rewards/chosen": -4.016324043273926,
870
+ "rewards/margins": 3.9200973510742188,
871
+ "rewards/rejected": -7.9364213943481445,
872
+ "step": 490
873
+ },
874
+ {
875
+ "epoch": 0.5555555555555556,
876
+ "grad_norm": 15.376676559448242,
877
+ "learning_rate": 4.533880175657419e-06,
878
+ "logits/chosen": -1.4524576663970947,
879
+ "logits/rejected": -1.4585695266723633,
880
+ "logps/chosen": -218.01429748535156,
881
+ "logps/rejected": -257.30328369140625,
882
+ "loss": 0.2261,
883
+ "rewards/accuracies": 0.9100000262260437,
884
+ "rewards/chosen": -4.428624153137207,
885
+ "rewards/margins": 3.822225332260132,
886
+ "rewards/rejected": -8.250848770141602,
887
+ "step": 500
888
+ },
889
+ {
890
+ "epoch": 0.5666666666666667,
891
+ "grad_norm": 25.499267578125,
892
+ "learning_rate": 4.50530798188761e-06,
893
+ "logits/chosen": -1.451499342918396,
894
+ "logits/rejected": -1.4615750312805176,
895
+ "logps/chosen": -223.37664794921875,
896
+ "logps/rejected": -253.57177734375,
897
+ "loss": 0.2516,
898
+ "rewards/accuracies": 0.9000000357627869,
899
+ "rewards/chosen": -4.594554424285889,
900
+ "rewards/margins": 3.6208624839782715,
901
+ "rewards/rejected": -8.215417861938477,
902
+ "step": 510
903
+ },
904
+ {
905
+ "epoch": 0.5777777777777777,
906
+ "grad_norm": 42.641754150390625,
907
+ "learning_rate": 4.475981673796899e-06,
908
+ "logits/chosen": -1.4456167221069336,
909
+ "logits/rejected": -1.4504668712615967,
910
+ "logps/chosen": -213.45851135253906,
911
+ "logps/rejected": -259.6695251464844,
912
+ "loss": 0.2521,
913
+ "rewards/accuracies": 0.9200000762939453,
914
+ "rewards/chosen": -4.051717281341553,
915
+ "rewards/margins": 4.357028484344482,
916
+ "rewards/rejected": -8.408745765686035,
917
+ "step": 520
918
+ },
919
+ {
920
+ "epoch": 0.5888888888888889,
921
+ "grad_norm": 26.318056106567383,
922
+ "learning_rate": 4.445912279810401e-06,
923
+ "logits/chosen": -1.4452048540115356,
924
+ "logits/rejected": -1.4490594863891602,
925
+ "logps/chosen": -211.29248046875,
926
+ "logps/rejected": -264.21600341796875,
927
+ "loss": 0.2038,
928
+ "rewards/accuracies": 0.9000000357627869,
929
+ "rewards/chosen": -3.8537445068359375,
930
+ "rewards/margins": 4.980400085449219,
931
+ "rewards/rejected": -8.834144592285156,
932
+ "step": 530
933
+ },
934
+ {
935
+ "epoch": 0.6,
936
+ "grad_norm": 46.37030792236328,
937
+ "learning_rate": 4.415111107797445e-06,
938
+ "logits/chosen": -1.4452967643737793,
939
+ "logits/rejected": -1.448035478591919,
940
+ "logps/chosen": -221.65042114257812,
941
+ "logps/rejected": -268.7168273925781,
942
+ "loss": 0.2459,
943
+ "rewards/accuracies": 0.8399999737739563,
944
+ "rewards/chosen": -4.855015754699707,
945
+ "rewards/margins": 4.4556379318237305,
946
+ "rewards/rejected": -9.310652732849121,
947
+ "step": 540
948
+ },
949
+ {
950
+ "epoch": 0.6,
951
+ "eval_logits/chosen": -1.4478332996368408,
952
+ "eval_logits/rejected": -1.4528884887695312,
953
+ "eval_logps/chosen": -222.76666259765625,
954
+ "eval_logps/rejected": -269.6318664550781,
955
+ "eval_loss": 0.21725089848041534,
956
+ "eval_rewards/accuracies": 0.8889999389648438,
957
+ "eval_rewards/chosen": -4.773774147033691,
958
+ "eval_rewards/margins": 4.828692436218262,
959
+ "eval_rewards/rejected": -9.602466583251953,
960
+ "eval_runtime": 319.0307,
961
+ "eval_samples_per_second": 3.134,
962
+ "eval_steps_per_second": 0.313,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.6111111111111112,
967
+ "grad_norm": 37.16395568847656,
968
+ "learning_rate": 4.3835897408191515e-06,
969
+ "logits/chosen": -1.450826644897461,
970
+ "logits/rejected": -1.4534823894500732,
971
+ "logps/chosen": -222.22439575195312,
972
+ "logps/rejected": -270.947998046875,
973
+ "loss": 0.1905,
974
+ "rewards/accuracies": 0.9300000071525574,
975
+ "rewards/chosen": -4.919099807739258,
976
+ "rewards/margins": 4.604528427124023,
977
+ "rewards/rejected": -9.523628234863281,
978
+ "step": 550
979
+ },
980
+ {
981
+ "epoch": 0.6222222222222222,
982
+ "grad_norm": 26.3408260345459,
983
+ "learning_rate": 4.351360032772512e-06,
984
+ "logits/chosen": -1.4518877267837524,
985
+ "logits/rejected": -1.4572858810424805,
986
+ "logps/chosen": -215.63409423828125,
987
+ "logps/rejected": -271.2196044921875,
988
+ "loss": 0.1935,
989
+ "rewards/accuracies": 0.940000057220459,
990
+ "rewards/chosen": -4.156116962432861,
991
+ "rewards/margins": 5.512393951416016,
992
+ "rewards/rejected": -9.668511390686035,
993
+ "step": 560
994
+ },
995
+ {
996
+ "epoch": 0.6333333333333333,
997
+ "grad_norm": 30.472354888916016,
998
+ "learning_rate": 4.318434103932622e-06,
999
+ "logits/chosen": -1.4471065998077393,
1000
+ "logits/rejected": -1.45332932472229,
1001
+ "logps/chosen": -217.19085693359375,
1002
+ "logps/rejected": -264.91046142578125,
1003
+ "loss": 0.3623,
1004
+ "rewards/accuracies": 0.8700000047683716,
1005
+ "rewards/chosen": -4.126136779785156,
1006
+ "rewards/margins": 5.096201419830322,
1007
+ "rewards/rejected": -9.22233772277832,
1008
+ "step": 570
1009
+ },
1010
+ {
1011
+ "epoch": 0.6444444444444445,
1012
+ "grad_norm": 17.42032814025879,
1013
+ "learning_rate": 4.284824336394748e-06,
1014
+ "logits/chosen": -1.4501465559005737,
1015
+ "logits/rejected": -1.4535834789276123,
1016
+ "logps/chosen": -216.29188537597656,
1017
+ "logps/rejected": -262.982421875,
1018
+ "loss": 0.2146,
1019
+ "rewards/accuracies": 0.9100000262260437,
1020
+ "rewards/chosen": -4.250003814697266,
1021
+ "rewards/margins": 4.562039852142334,
1022
+ "rewards/rejected": -8.812044143676758,
1023
+ "step": 580
1024
+ },
1025
+ {
1026
+ "epoch": 0.6555555555555556,
1027
+ "grad_norm": 8.025737762451172,
1028
+ "learning_rate": 4.250543369417921e-06,
1029
+ "logits/chosen": -1.4417762756347656,
1030
+ "logits/rejected": -1.445784568786621,
1031
+ "logps/chosen": -210.0897216796875,
1032
+ "logps/rejected": -259.0534973144531,
1033
+ "loss": 0.2008,
1034
+ "rewards/accuracies": 0.9000000357627869,
1035
+ "rewards/chosen": -3.6182351112365723,
1036
+ "rewards/margins": 4.80393123626709,
1037
+ "rewards/rejected": -8.42216682434082,
1038
+ "step": 590
1039
+ },
1040
+ {
1041
+ "epoch": 0.6666666666666666,
1042
+ "grad_norm": 47.6915397644043,
1043
+ "learning_rate": 4.215604094671835e-06,
1044
+ "logits/chosen": -1.4405059814453125,
1045
+ "logits/rejected": -1.4476011991500854,
1046
+ "logps/chosen": -208.40203857421875,
1047
+ "logps/rejected": -262.4669189453125,
1048
+ "loss": 0.1729,
1049
+ "rewards/accuracies": 0.9300000071525574,
1050
+ "rewards/chosen": -3.2039127349853516,
1051
+ "rewards/margins": 5.8355712890625,
1052
+ "rewards/rejected": -9.039484024047852,
1053
+ "step": 600
1054
+ },
1055
+ {
1056
+ "epoch": 0.6666666666666666,
1057
+ "eval_logits/chosen": -1.4379254579544067,
1058
+ "eval_logits/rejected": -1.4430339336395264,
1059
+ "eval_logps/chosen": -211.66957092285156,
1060
+ "eval_logps/rejected": -264.79345703125,
1061
+ "eval_loss": 0.22635750472545624,
1062
+ "eval_rewards/accuracies": 0.9199999570846558,
1063
+ "eval_rewards/chosen": -3.664064407348633,
1064
+ "eval_rewards/margins": 5.454564094543457,
1065
+ "eval_rewards/rejected": -9.118627548217773,
1066
+ "eval_runtime": 319.005,
1067
+ "eval_samples_per_second": 3.135,
1068
+ "eval_steps_per_second": 0.313,
1069
+ "step": 600
1070
+ },
1071
+ {
1072
+ "epoch": 0.6777777777777778,
1073
+ "grad_norm": 9.863251686096191,
1074
+ "learning_rate": 4.180019651388807e-06,
1075
+ "logits/chosen": -1.4420831203460693,
1076
+ "logits/rejected": -1.4478440284729004,
1077
+ "logps/chosen": -215.6461181640625,
1078
+ "logps/rejected": -264.3682861328125,
1079
+ "loss": 0.1723,
1080
+ "rewards/accuracies": 0.9100000858306885,
1081
+ "rewards/chosen": -3.9336395263671875,
1082
+ "rewards/margins": 5.252224922180176,
1083
+ "rewards/rejected": -9.185864448547363,
1084
+ "step": 610
1085
+ },
1086
+ {
1087
+ "epoch": 0.6888888888888889,
1088
+ "grad_norm": 26.010082244873047,
1089
+ "learning_rate": 4.14380342142266e-06,
1090
+ "logits/chosen": -1.4423331022262573,
1091
+ "logits/rejected": -1.4474163055419922,
1092
+ "logps/chosen": -207.67831420898438,
1093
+ "logps/rejected": -265.69677734375,
1094
+ "loss": 0.214,
1095
+ "rewards/accuracies": 0.9099999666213989,
1096
+ "rewards/chosen": -3.3267159461975098,
1097
+ "rewards/margins": 5.816192626953125,
1098
+ "rewards/rejected": -9.142909049987793,
1099
+ "step": 620
1100
+ },
1101
+ {
1102
+ "epoch": 0.7,
1103
+ "grad_norm": 23.913930892944336,
1104
+ "learning_rate": 4.106969024216348e-06,
1105
+ "logits/chosen": -1.43362557888031,
1106
+ "logits/rejected": -1.4401135444641113,
1107
+ "logps/chosen": -211.0988311767578,
1108
+ "logps/rejected": -265.10693359375,
1109
+ "loss": 0.4388,
1110
+ "rewards/accuracies": 0.8899999856948853,
1111
+ "rewards/chosen": -3.5427446365356445,
1112
+ "rewards/margins": 5.675654411315918,
1113
+ "rewards/rejected": -9.218399047851562,
1114
+ "step": 630
1115
+ },
1116
+ {
1117
+ "epoch": 0.7111111111111111,
1118
+ "grad_norm": 26.446819305419922,
1119
+ "learning_rate": 4.069530311680247e-06,
1120
+ "logits/chosen": -1.4354360103607178,
1121
+ "logits/rejected": -1.442990779876709,
1122
+ "logps/chosen": -204.5161590576172,
1123
+ "logps/rejected": -251.73101806640625,
1124
+ "loss": 0.2555,
1125
+ "rewards/accuracies": 0.9200000166893005,
1126
+ "rewards/chosen": -2.7997024059295654,
1127
+ "rewards/margins": 5.167999267578125,
1128
+ "rewards/rejected": -7.967701435089111,
1129
+ "step": 640
1130
+ },
1131
+ {
1132
+ "epoch": 0.7222222222222222,
1133
+ "grad_norm": 2.0295379161834717,
1134
+ "learning_rate": 4.031501362983007e-06,
1135
+ "logits/chosen": -1.4334403276443481,
1136
+ "logits/rejected": -1.4392154216766357,
1137
+ "logps/chosen": -205.815673828125,
1138
+ "logps/rejected": -249.6090087890625,
1139
+ "loss": 0.3747,
1140
+ "rewards/accuracies": 0.8800000548362732,
1141
+ "rewards/chosen": -3.0156917572021484,
1142
+ "rewards/margins": 4.648188591003418,
1143
+ "rewards/rejected": -7.663880348205566,
1144
+ "step": 650
1145
+ },
1146
+ {
1147
+ "epoch": 0.7333333333333333,
1148
+ "grad_norm": 22.82501792907715,
1149
+ "learning_rate": 3.992896479256966e-06,
1150
+ "logits/chosen": -1.4355220794677734,
1151
+ "logits/rejected": -1.4445066452026367,
1152
+ "logps/chosen": -205.87745666503906,
1153
+ "logps/rejected": -252.21890258789062,
1154
+ "loss": 0.2136,
1155
+ "rewards/accuracies": 0.9500000476837158,
1156
+ "rewards/chosen": -2.8590097427368164,
1157
+ "rewards/margins": 5.230529308319092,
1158
+ "rewards/rejected": -8.08953857421875,
1159
+ "step": 660
1160
+ },
1161
+ {
1162
+ "epoch": 0.7333333333333333,
1163
+ "eval_logits/chosen": -1.4456157684326172,
1164
+ "eval_logits/rejected": -1.451847791671753,
1165
+ "eval_logps/chosen": -206.54913330078125,
1166
+ "eval_logps/rejected": -253.787353515625,
1167
+ "eval_loss": 0.19935038685798645,
1168
+ "eval_rewards/accuracies": 0.918999969959259,
1169
+ "eval_rewards/chosen": -3.1520204544067383,
1170
+ "eval_rewards/margins": 4.865995407104492,
1171
+ "eval_rewards/rejected": -8.01801586151123,
1172
+ "eval_runtime": 319.1328,
1173
+ "eval_samples_per_second": 3.133,
1174
+ "eval_steps_per_second": 0.313,
1175
+ "step": 660
1176
+ },
1177
+ {
1178
+ "epoch": 0.7444444444444445,
1179
+ "grad_norm": 37.078155517578125,
1180
+ "learning_rate": 3.953730178220067e-06,
1181
+ "logits/chosen": -1.4451912641525269,
1182
+ "logits/rejected": -1.4504950046539307,
1183
+ "logps/chosen": -208.33489990234375,
1184
+ "logps/rejected": -255.33157348632812,
1185
+ "loss": 0.2289,
1186
+ "rewards/accuracies": 0.9199999570846558,
1187
+ "rewards/chosen": -3.3780035972595215,
1188
+ "rewards/margins": 4.752861976623535,
1189
+ "rewards/rejected": -8.130865097045898,
1190
+ "step": 670
1191
+ },
1192
+ {
1193
+ "epoch": 0.7555555555555555,
1194
+ "grad_norm": 14.792739868164062,
1195
+ "learning_rate": 3.914017188716347e-06,
1196
+ "logits/chosen": -1.446117877960205,
1197
+ "logits/rejected": -1.4537690877914429,
1198
+ "logps/chosen": -207.12896728515625,
1199
+ "logps/rejected": -261.03814697265625,
1200
+ "loss": 0.1755,
1201
+ "rewards/accuracies": 0.9399999976158142,
1202
+ "rewards/chosen": -3.137814998626709,
1203
+ "rewards/margins": 5.663388252258301,
1204
+ "rewards/rejected": -8.801202774047852,
1205
+ "step": 680
1206
+ },
1207
+ {
1208
+ "epoch": 0.7666666666666667,
1209
+ "grad_norm": 9.229610443115234,
1210
+ "learning_rate": 3.8737724451770155e-06,
1211
+ "logits/chosen": -1.4443621635437012,
1212
+ "logits/rejected": -1.4512722492218018,
1213
+ "logps/chosen": -215.41629028320312,
1214
+ "logps/rejected": -255.59149169921875,
1215
+ "loss": 0.2433,
1216
+ "rewards/accuracies": 0.8800000548362732,
1217
+ "rewards/chosen": -3.9103140830993652,
1218
+ "rewards/margins": 4.392501354217529,
1219
+ "rewards/rejected": -8.302814483642578,
1220
+ "step": 690
1221
+ },
1222
+ {
1223
+ "epoch": 0.7777777777777778,
1224
+ "grad_norm": 4.114097595214844,
1225
+ "learning_rate": 3.833011082004229e-06,
1226
+ "logits/chosen": -1.4504740238189697,
1227
+ "logits/rejected": -1.4539170265197754,
1228
+ "logps/chosen": -208.27923583984375,
1229
+ "logps/rejected": -259.309326171875,
1230
+ "loss": 0.1322,
1231
+ "rewards/accuracies": 0.940000057220459,
1232
+ "rewards/chosen": -3.5451531410217285,
1233
+ "rewards/margins": 4.7936835289001465,
1234
+ "rewards/rejected": -8.338837623596191,
1235
+ "step": 700
1236
+ },
1237
+ {
1238
+ "epoch": 0.7888888888888889,
1239
+ "grad_norm": 14.269043922424316,
1240
+ "learning_rate": 3.7917484278796578e-06,
1241
+ "logits/chosen": -1.4536712169647217,
1242
+ "logits/rejected": -1.4596309661865234,
1243
+ "logps/chosen": -212.81170654296875,
1244
+ "logps/rejected": -259.4583435058594,
1245
+ "loss": 0.2778,
1246
+ "rewards/accuracies": 0.9100000858306885,
1247
+ "rewards/chosen": -3.7558376789093018,
1248
+ "rewards/margins": 4.881363868713379,
1249
+ "rewards/rejected": -8.637201309204102,
1250
+ "step": 710
1251
+ },
1252
+ {
1253
+ "epoch": 0.8,
1254
+ "grad_norm": 2.647397756576538,
1255
+ "learning_rate": 3.7500000000000005e-06,
1256
+ "logits/chosen": -1.4511842727661133,
1257
+ "logits/rejected": -1.456930160522461,
1258
+ "logps/chosen": -208.67654418945312,
1259
+ "logps/rejected": -263.60205078125,
1260
+ "loss": 0.2148,
1261
+ "rewards/accuracies": 0.940000057220459,
1262
+ "rewards/chosen": -3.4297666549682617,
1263
+ "rewards/margins": 5.496917724609375,
1264
+ "rewards/rejected": -8.926685333251953,
1265
+ "step": 720
1266
+ },
1267
+ {
1268
+ "epoch": 0.8,
1269
+ "eval_logits/chosen": -1.4526758193969727,
1270
+ "eval_logits/rejected": -1.4588308334350586,
1271
+ "eval_logps/chosen": -208.24917602539062,
1272
+ "eval_logps/rejected": -259.9820251464844,
1273
+ "eval_loss": 0.26233014464378357,
1274
+ "eval_rewards/accuracies": 0.9039999842643738,
1275
+ "eval_rewards/chosen": -3.3220245838165283,
1276
+ "eval_rewards/margins": 5.315458297729492,
1277
+ "eval_rewards/rejected": -8.637483596801758,
1278
+ "eval_runtime": 319.0745,
1279
+ "eval_samples_per_second": 3.134,
1280
+ "eval_steps_per_second": 0.313,
1281
+ "step": 720
1282
+ },
1283
+ {
1284
+ "epoch": 0.8111111111111111,
1285
+ "grad_norm": 27.4842472076416,
1286
+ "learning_rate": 3.7077814982415966e-06,
1287
+ "logits/chosen": -1.4542248249053955,
1288
+ "logits/rejected": -1.4581375122070312,
1289
+ "logps/chosen": -201.25257873535156,
1290
+ "logps/rejected": -267.01409912109375,
1291
+ "loss": 0.1524,
1292
+ "rewards/accuracies": 0.9300000071525574,
1293
+ "rewards/chosen": -2.901744842529297,
1294
+ "rewards/margins": 6.153472900390625,
1295
+ "rewards/rejected": -9.055217742919922,
1296
+ "step": 730
1297
+ },
1298
+ {
1299
+ "epoch": 0.8222222222222222,
1300
+ "grad_norm": 17.44131851196289,
1301
+ "learning_rate": 3.665108799256348e-06,
1302
+ "logits/chosen": -1.4501639604568481,
1303
+ "logits/rejected": -1.4550120830535889,
1304
+ "logps/chosen": -215.76513671875,
1305
+ "logps/rejected": -265.45428466796875,
1306
+ "loss": 0.1982,
1307
+ "rewards/accuracies": 0.9200000166893005,
1308
+ "rewards/chosen": -4.081113815307617,
1309
+ "rewards/margins": 5.071871757507324,
1310
+ "rewards/rejected": -9.152984619140625,
1311
+ "step": 740
1312
+ },
1313
+ {
1314
+ "epoch": 0.8333333333333334,
1315
+ "grad_norm": 58.25971221923828,
1316
+ "learning_rate": 3.621997950501156e-06,
1317
+ "logits/chosen": -1.4513449668884277,
1318
+ "logits/rejected": -1.4563398361206055,
1319
+ "logps/chosen": -208.85487365722656,
1320
+ "logps/rejected": -267.5930480957031,
1321
+ "loss": 0.2564,
1322
+ "rewards/accuracies": 0.89000004529953,
1323
+ "rewards/chosen": -3.607893466949463,
1324
+ "rewards/margins": 5.560456275939941,
1325
+ "rewards/rejected": -9.168350219726562,
1326
+ "step": 750
1327
+ },
1328
+ {
1329
+ "epoch": 0.8444444444444444,
1330
+ "grad_norm": 30.51304054260254,
1331
+ "learning_rate": 3.578465164203134e-06,
1332
+ "logits/chosen": -1.454546332359314,
1333
+ "logits/rejected": -1.457871913909912,
1334
+ "logps/chosen": -204.0816650390625,
1335
+ "logps/rejected": -271.85711669921875,
1336
+ "loss": 0.169,
1337
+ "rewards/accuracies": 0.9500000476837158,
1338
+ "rewards/chosen": -3.2631070613861084,
1339
+ "rewards/margins": 6.1964874267578125,
1340
+ "rewards/rejected": -9.4595947265625,
1341
+ "step": 760
1342
+ },
1343
+ {
1344
+ "epoch": 0.8555555555555555,
1345
+ "grad_norm": 28.097698211669922,
1346
+ "learning_rate": 3.5345268112628485e-06,
1347
+ "logits/chosen": -1.4505870342254639,
1348
+ "logits/rejected": -1.457573652267456,
1349
+ "logps/chosen": -215.683349609375,
1350
+ "logps/rejected": -270.27252197265625,
1351
+ "loss": 0.2219,
1352
+ "rewards/accuracies": 0.9300000071525574,
1353
+ "rewards/chosen": -4.015974998474121,
1354
+ "rewards/margins": 5.678750038146973,
1355
+ "rewards/rejected": -9.694725036621094,
1356
+ "step": 770
1357
+ },
1358
+ {
1359
+ "epoch": 0.8666666666666667,
1360
+ "grad_norm": 36.97835159301758,
1361
+ "learning_rate": 3.4901994150978926e-06,
1362
+ "logits/chosen": -1.4549884796142578,
1363
+ "logits/rejected": -1.4569082260131836,
1364
+ "logps/chosen": -204.8563995361328,
1365
+ "logps/rejected": -270.4274597167969,
1366
+ "loss": 0.151,
1367
+ "rewards/accuracies": 0.9600000381469727,
1368
+ "rewards/chosen": -3.443523406982422,
1369
+ "rewards/margins": 5.77408504486084,
1370
+ "rewards/rejected": -9.217609405517578,
1371
+ "step": 780
1372
+ },
1373
+ {
1374
+ "epoch": 0.8666666666666667,
1375
+ "eval_logits/chosen": -1.455579400062561,
1376
+ "eval_logits/rejected": -1.462104320526123,
1377
+ "eval_logps/chosen": -212.8717041015625,
1378
+ "eval_logps/rejected": -266.91241455078125,
1379
+ "eval_loss": 0.26282998919487,
1380
+ "eval_rewards/accuracies": 0.8830000162124634,
1381
+ "eval_rewards/chosen": -3.78427791595459,
1382
+ "eval_rewards/margins": 5.546243190765381,
1383
+ "eval_rewards/rejected": -9.330520629882812,
1384
+ "eval_runtime": 319.1792,
1385
+ "eval_samples_per_second": 3.133,
1386
+ "eval_steps_per_second": 0.313,
1387
+ "step": 780
1388
+ },
1389
+ {
1390
+ "epoch": 0.8777777777777778,
1391
+ "grad_norm": 4.409013748168945,
1392
+ "learning_rate": 3.4454996454291066e-06,
1393
+ "logits/chosen": -1.454880952835083,
1394
+ "logits/rejected": -1.4608569145202637,
1395
+ "logps/chosen": -213.51556396484375,
1396
+ "logps/rejected": -270.1238708496094,
1397
+ "loss": 0.2572,
1398
+ "rewards/accuracies": 0.8999999761581421,
1399
+ "rewards/chosen": -3.862175464630127,
1400
+ "rewards/margins": 5.772583961486816,
1401
+ "rewards/rejected": -9.634759902954102,
1402
+ "step": 790
1403
+ },
1404
+ {
1405
+ "epoch": 0.8888888888888888,
1406
+ "grad_norm": 29.155506134033203,
1407
+ "learning_rate": 3.400444312011776e-06,
1408
+ "logits/chosen": -1.4549602270126343,
1409
+ "logits/rejected": -1.4602875709533691,
1410
+ "logps/chosen": -212.6188201904297,
1411
+ "logps/rejected": -274.49560546875,
1412
+ "loss": 0.1285,
1413
+ "rewards/accuracies": 0.9600000381469727,
1414
+ "rewards/chosen": -3.8824949264526367,
1415
+ "rewards/margins": 6.083772659301758,
1416
+ "rewards/rejected": -9.966266632080078,
1417
+ "step": 800
1418
+ },
1419
+ {
1420
+ "epoch": 0.9,
1421
+ "grad_norm": 28.179977416992188,
1422
+ "learning_rate": 3.3550503583141726e-06,
1423
+ "logits/chosen": -1.4578851461410522,
1424
+ "logits/rejected": -1.4644014835357666,
1425
+ "logps/chosen": -214.60816955566406,
1426
+ "logps/rejected": -270.767822265625,
1427
+ "loss": 0.3057,
1428
+ "rewards/accuracies": 0.8899999856948853,
1429
+ "rewards/chosen": -3.949023723602295,
1430
+ "rewards/margins": 5.775270462036133,
1431
+ "rewards/rejected": -9.724294662475586,
1432
+ "step": 810
1433
+ },
1434
+ {
1435
+ "epoch": 0.9111111111111111,
1436
+ "grad_norm": 22.016096115112305,
1437
+ "learning_rate": 3.3093348551458033e-06,
1438
+ "logits/chosen": -1.4591329097747803,
1439
+ "logits/rejected": -1.464478850364685,
1440
+ "logps/chosen": -206.40281677246094,
1441
+ "logps/rejected": -272.22930908203125,
1442
+ "loss": 0.1286,
1443
+ "rewards/accuracies": 0.9700000286102295,
1444
+ "rewards/chosen": -3.3459863662719727,
1445
+ "rewards/margins": 6.317253112792969,
1446
+ "rewards/rejected": -9.663239479064941,
1447
+ "step": 820
1448
+ },
1449
+ {
1450
+ "epoch": 0.9222222222222223,
1451
+ "grad_norm": 24.308671951293945,
1452
+ "learning_rate": 3.2633149942377835e-06,
1453
+ "logits/chosen": -1.4574294090270996,
1454
+ "logits/rejected": -1.4642754793167114,
1455
+ "logps/chosen": -213.82862854003906,
1456
+ "logps/rejected": -266.60675048828125,
1457
+ "loss": 0.2728,
1458
+ "rewards/accuracies": 0.9000000357627869,
1459
+ "rewards/chosen": -3.8955249786376953,
1460
+ "rewards/margins": 5.386727809906006,
1461
+ "rewards/rejected": -9.282252311706543,
1462
+ "step": 830
1463
+ },
1464
+ {
1465
+ "epoch": 0.9333333333333333,
1466
+ "grad_norm": 18.76812171936035,
1467
+ "learning_rate": 3.217008081777726e-06,
1468
+ "logits/chosen": -1.4542195796966553,
1469
+ "logits/rejected": -1.461412787437439,
1470
+ "logps/chosen": -212.99435424804688,
1471
+ "logps/rejected": -267.50958251953125,
1472
+ "loss": 0.1759,
1473
+ "rewards/accuracies": 0.940000057220459,
1474
+ "rewards/chosen": -3.8036112785339355,
1475
+ "rewards/margins": 5.557330131530762,
1476
+ "rewards/rejected": -9.360941886901855,
1477
+ "step": 840
1478
+ },
1479
+ {
1480
+ "epoch": 0.9333333333333333,
1481
+ "eval_logits/chosen": -1.4564862251281738,
1482
+ "eval_logits/rejected": -1.463136911392212,
1483
+ "eval_logps/chosen": -212.54718017578125,
1484
+ "eval_logps/rejected": -267.1683349609375,
1485
+ "eval_loss": 0.17360562086105347,
1486
+ "eval_rewards/accuracies": 0.9269999861717224,
1487
+ "eval_rewards/chosen": -3.751824378967285,
1488
+ "eval_rewards/margins": 5.604288101196289,
1489
+ "eval_rewards/rejected": -9.35611343383789,
1490
+ "eval_runtime": 319.0169,
1491
+ "eval_samples_per_second": 3.135,
1492
+ "eval_steps_per_second": 0.313,
1493
+ "step": 840
1494
+ },
1495
+ {
1496
+ "epoch": 0.9444444444444444,
1497
+ "grad_norm": 7.19240665435791,
1498
+ "learning_rate": 3.1704315319015936e-06,
1499
+ "logits/chosen": -1.4580819606781006,
1500
+ "logits/rejected": -1.46415114402771,
1501
+ "logps/chosen": -211.7685546875,
1502
+ "logps/rejected": -267.0213623046875,
1503
+ "loss": 0.2128,
1504
+ "rewards/accuracies": 0.9100000262260437,
1505
+ "rewards/chosen": -3.7857413291931152,
1506
+ "rewards/margins": 5.433224678039551,
1507
+ "rewards/rejected": -9.218965530395508,
1508
+ "step": 850
1509
+ },
1510
+ {
1511
+ "epoch": 0.9555555555555556,
1512
+ "grad_norm": 36.987693786621094,
1513
+ "learning_rate": 3.1236028601449534e-06,
1514
+ "logits/chosen": -1.457148551940918,
1515
+ "logits/rejected": -1.4629095792770386,
1516
+ "logps/chosen": -213.85028076171875,
1517
+ "logps/rejected": -263.3716735839844,
1518
+ "loss": 0.2345,
1519
+ "rewards/accuracies": 0.8800000548362732,
1520
+ "rewards/chosen": -3.9159281253814697,
1521
+ "rewards/margins": 5.010843276977539,
1522
+ "rewards/rejected": -8.92677116394043,
1523
+ "step": 860
1524
+ },
1525
+ {
1526
+ "epoch": 0.9666666666666667,
1527
+ "grad_norm": 3.213857889175415,
1528
+ "learning_rate": 3.0765396768561005e-06,
1529
+ "logits/chosen": -1.4600489139556885,
1530
+ "logits/rejected": -1.4643452167510986,
1531
+ "logps/chosen": -207.65179443359375,
1532
+ "logps/rejected": -265.60382080078125,
1533
+ "loss": 0.1257,
1534
+ "rewards/accuracies": 0.940000057220459,
1535
+ "rewards/chosen": -3.5244479179382324,
1536
+ "rewards/margins": 5.42505407333374,
1537
+ "rewards/rejected": -8.949502944946289,
1538
+ "step": 870
1539
+ },
1540
+ {
1541
+ "epoch": 0.9777777777777777,
1542
+ "grad_norm": 2.7685673236846924,
1543
+ "learning_rate": 3.0292596805735275e-06,
1544
+ "logits/chosen": -1.4531805515289307,
1545
+ "logits/rejected": -1.4613621234893799,
1546
+ "logps/chosen": -207.08041381835938,
1547
+ "logps/rejected": -272.2119140625,
1548
+ "loss": 0.0729,
1549
+ "rewards/accuracies": 0.9600000381469727,
1550
+ "rewards/chosen": -3.164515495300293,
1551
+ "rewards/margins": 6.724908351898193,
1552
+ "rewards/rejected": -9.889423370361328,
1553
+ "step": 880
1554
+ },
1555
+ {
1556
+ "epoch": 0.9888888888888889,
1557
+ "grad_norm": 32.784828186035156,
1558
+ "learning_rate": 2.9817806513702247e-06,
1559
+ "logits/chosen": -1.4549615383148193,
1560
+ "logits/rejected": -1.4622005224227905,
1561
+ "logps/chosen": -208.28564453125,
1562
+ "logps/rejected": -271.87994384765625,
1563
+ "loss": 0.261,
1564
+ "rewards/accuracies": 0.9000000357627869,
1565
+ "rewards/chosen": -3.400259494781494,
1566
+ "rewards/margins": 6.355001449584961,
1567
+ "rewards/rejected": -9.755260467529297,
1568
+ "step": 890
1569
+ },
1570
+ {
1571
+ "epoch": 1.0,
1572
+ "grad_norm": 19.346893310546875,
1573
+ "learning_rate": 2.9341204441673267e-06,
1574
+ "logits/chosen": -1.4544117450714111,
1575
+ "logits/rejected": -1.4625937938690186,
1576
+ "logps/chosen": -213.2257537841797,
1577
+ "logps/rejected": -273.80535888671875,
1578
+ "loss": 0.1455,
1579
+ "rewards/accuracies": 0.9500000476837158,
1580
+ "rewards/chosen": -3.6717934608459473,
1581
+ "rewards/margins": 6.4866108894348145,
1582
+ "rewards/rejected": -10.158405303955078,
1583
+ "step": 900
1584
+ },
1585
+ {
1586
+ "epoch": 1.0,
1587
+ "eval_logits/chosen": -1.4550888538360596,
1588
+ "eval_logits/rejected": -1.4625444412231445,
1589
+ "eval_logps/chosen": -209.57638549804688,
1590
+ "eval_logps/rejected": -274.5335388183594,
1591
+ "eval_loss": 0.19673706591129303,
1592
+ "eval_rewards/accuracies": 0.9290000200271606,
1593
+ "eval_rewards/chosen": -3.454745292663574,
1594
+ "eval_rewards/margins": 6.637889862060547,
1595
+ "eval_rewards/rejected": -10.092636108398438,
1596
+ "eval_runtime": 319.1955,
1597
+ "eval_samples_per_second": 3.133,
1598
+ "eval_steps_per_second": 0.313,
1599
+ "step": 900
1600
+ },
1601
+ {
1602
+ "epoch": 1.011111111111111,
1603
+ "grad_norm": 5.159682273864746,
1604
+ "learning_rate": 2.8862969820196017e-06,
1605
+ "logits/chosen": -1.453148603439331,
1606
+ "logits/rejected": -1.460700511932373,
1607
+ "logps/chosen": -207.94732666015625,
1608
+ "logps/rejected": -275.78265380859375,
1609
+ "loss": 0.1197,
1610
+ "rewards/accuracies": 0.9500000476837158,
1611
+ "rewards/chosen": -3.277247428894043,
1612
+ "rewards/margins": 6.945833683013916,
1613
+ "rewards/rejected": -10.223081588745117,
1614
+ "step": 910
1615
+ },
1616
+ {
1617
+ "epoch": 1.0222222222222221,
1618
+ "grad_norm": 39.852725982666016,
1619
+ "learning_rate": 2.8383282493753282e-06,
1620
+ "logits/chosen": -1.4552119970321655,
1621
+ "logits/rejected": -1.4620335102081299,
1622
+ "logps/chosen": -205.69607543945312,
1623
+ "logps/rejected": -279.0772705078125,
1624
+ "loss": 0.1646,
1625
+ "rewards/accuracies": 0.9500000476837158,
1626
+ "rewards/chosen": -3.194272041320801,
1627
+ "rewards/margins": 7.236158847808838,
1628
+ "rewards/rejected": -10.430431365966797,
1629
+ "step": 920
1630
+ },
1631
+ {
1632
+ "epoch": 1.0333333333333334,
1633
+ "grad_norm": 0.4127664268016815,
1634
+ "learning_rate": 2.7902322853130758e-06,
1635
+ "logits/chosen": -1.4518330097198486,
1636
+ "logits/rejected": -1.4583864212036133,
1637
+ "logps/chosen": -208.1166229248047,
1638
+ "logps/rejected": -273.89801025390625,
1639
+ "loss": 0.1935,
1640
+ "rewards/accuracies": 0.9300000071525574,
1641
+ "rewards/chosen": -3.375034809112549,
1642
+ "rewards/margins": 6.5843119621276855,
1643
+ "rewards/rejected": -9.959346771240234,
1644
+ "step": 930
1645
+ },
1646
+ {
1647
+ "epoch": 1.0444444444444445,
1648
+ "grad_norm": 55.90793991088867,
1649
+ "learning_rate": 2.742027176757948e-06,
1650
+ "logits/chosen": -1.4538707733154297,
1651
+ "logits/rejected": -1.4589080810546875,
1652
+ "logps/chosen": -207.4318389892578,
1653
+ "logps/rejected": -275.7708740234375,
1654
+ "loss": 0.2136,
1655
+ "rewards/accuracies": 0.9100000262260437,
1656
+ "rewards/chosen": -3.4339537620544434,
1657
+ "rewards/margins": 6.580141544342041,
1658
+ "rewards/rejected": -10.014095306396484,
1659
+ "step": 940
1660
+ },
1661
+ {
1662
+ "epoch": 1.0555555555555556,
1663
+ "grad_norm": 27.653209686279297,
1664
+ "learning_rate": 2.6937310516798276e-06,
1665
+ "logits/chosen": -1.4511687755584717,
1666
+ "logits/rejected": -1.4569811820983887,
1667
+ "logps/chosen": -213.1746368408203,
1668
+ "logps/rejected": -274.05364990234375,
1669
+ "loss": 0.3442,
1670
+ "rewards/accuracies": 0.8800000548362732,
1671
+ "rewards/chosen": -3.9909844398498535,
1672
+ "rewards/margins": 5.839582443237305,
1673
+ "rewards/rejected": -9.83056640625,
1674
+ "step": 950
1675
+ },
1676
+ {
1677
+ "epoch": 1.0666666666666667,
1678
+ "grad_norm": 17.936847686767578,
1679
+ "learning_rate": 2.6453620722761897e-06,
1680
+ "logits/chosen": -1.4525644779205322,
1681
+ "logits/rejected": -1.4593393802642822,
1682
+ "logps/chosen": -210.91744995117188,
1683
+ "logps/rejected": -276.6822814941406,
1684
+ "loss": 0.1456,
1685
+ "rewards/accuracies": 0.9500000476837158,
1686
+ "rewards/chosen": -3.682964324951172,
1687
+ "rewards/margins": 6.5234174728393555,
1688
+ "rewards/rejected": -10.206380844116211,
1689
+ "step": 960
1690
+ },
1691
+ {
1692
+ "epoch": 1.0666666666666667,
1693
+ "eval_logits/chosen": -1.4538413286209106,
1694
+ "eval_logits/rejected": -1.461044430732727,
1695
+ "eval_logps/chosen": -214.53591918945312,
1696
+ "eval_logps/rejected": -277.791259765625,
1697
+ "eval_loss": 0.2036525309085846,
1698
+ "eval_rewards/accuracies": 0.9289999604225159,
1699
+ "eval_rewards/chosen": -3.950699806213379,
1700
+ "eval_rewards/margins": 6.467706680297852,
1701
+ "eval_rewards/rejected": -10.418405532836914,
1702
+ "eval_runtime": 319.0271,
1703
+ "eval_samples_per_second": 3.135,
1704
+ "eval_steps_per_second": 0.313,
1705
+ "step": 960
1706
+ },
1707
+ {
1708
+ "epoch": 1.0777777777777777,
1709
+ "grad_norm": 53.1196403503418,
1710
+ "learning_rate": 2.5969384281420425e-06,
1711
+ "logits/chosen": -1.452633023262024,
1712
+ "logits/rejected": -1.4589219093322754,
1713
+ "logps/chosen": -213.8082275390625,
1714
+ "logps/rejected": -272.00054931640625,
1715
+ "loss": 0.2095,
1716
+ "rewards/accuracies": 0.9199999570846558,
1717
+ "rewards/chosen": -3.940258264541626,
1718
+ "rewards/margins": 5.825028896331787,
1719
+ "rewards/rejected": -9.765287399291992,
1720
+ "step": 970
1721
+ },
1722
+ {
1723
+ "epoch": 1.0888888888888888,
1724
+ "grad_norm": 9.145478248596191,
1725
+ "learning_rate": 2.548478329429561e-06,
1726
+ "logits/chosen": -1.4536033868789673,
1727
+ "logits/rejected": -1.4611570835113525,
1728
+ "logps/chosen": -206.41012573242188,
1729
+ "logps/rejected": -274.23272705078125,
1730
+ "loss": 0.2283,
1731
+ "rewards/accuracies": 0.9200000166893005,
1732
+ "rewards/chosen": -3.157097578048706,
1733
+ "rewards/margins": 6.88623046875,
1734
+ "rewards/rejected": -10.043328285217285,
1735
+ "step": 980
1736
+ },
1737
+ {
1738
+ "epoch": 1.1,
1739
+ "grad_norm": 26.69437026977539,
1740
+ "learning_rate": 2.5e-06,
1741
+ "logits/chosen": -1.452072024345398,
1742
+ "logits/rejected": -1.460184097290039,
1743
+ "logps/chosen": -215.56329345703125,
1744
+ "logps/rejected": -278.34051513671875,
1745
+ "loss": 0.2056,
1746
+ "rewards/accuracies": 0.9099999666213989,
1747
+ "rewards/chosen": -4.010292053222656,
1748
+ "rewards/margins": 6.502901077270508,
1749
+ "rewards/rejected": -10.513193130493164,
1750
+ "step": 990
1751
+ },
1752
+ {
1753
+ "epoch": 1.1111111111111112,
1754
+ "grad_norm": 26.09144401550293,
1755
+ "learning_rate": 2.4515216705704396e-06,
1756
+ "logits/chosen": -1.4517230987548828,
1757
+ "logits/rejected": -1.4599707126617432,
1758
+ "logps/chosen": -214.8649444580078,
1759
+ "logps/rejected": -274.9700927734375,
1760
+ "loss": 0.2523,
1761
+ "rewards/accuracies": 0.8999999761581421,
1762
+ "rewards/chosen": -3.9132699966430664,
1763
+ "rewards/margins": 6.279613494873047,
1764
+ "rewards/rejected": -10.192882537841797,
1765
+ "step": 1000
1766
+ }
1767
+ ],
1768
+ "logging_steps": 10,
1769
+ "max_steps": 1800,
1770
+ "num_input_tokens_seen": 0,
1771
+ "num_train_epochs": 2,
1772
+ "save_steps": 500,
1773
+ "stateful_callbacks": {
1774
+ "TrainerControl": {
1775
+ "args": {
1776
+ "should_epoch_stop": false,
1777
+ "should_evaluate": false,
1778
+ "should_log": false,
1779
+ "should_save": true,
1780
+ "should_training_stop": false
1781
+ },
1782
+ "attributes": {}
1783
+ }
1784
+ },
1785
+ "total_flos": 1.5931620691279872e+18,
1786
+ "train_batch_size": 5,
1787
+ "trial_name": null,
1788
+ "trial_params": null
1789
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d11f32108ca68e7f7be84e704fa987417996a33cca0180d79a224d4ab67c5e2
3
+ size 5432
checkpoint-1500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-1500/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-1500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2391ebeb1ae75d8f4d4eb05fd7a9c124bf7af19ad4dd0f13e9c89b6236b94392
3
+ size 114106856
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213f006311129aaf00c728d9fc28e2ced965a77f56482b1697099925ad1f423d
3
+ size 228536930
checkpoint-1500/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:913d0afcd3ae412b5721949e0bb0bd909a53a603cabbfb507320fefe794f592d
3
+ size 14512
checkpoint-1500/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56941205cfc72d1850aaad2f1758c8bef104008c04e7a7df0f24c0fbf1c5a583
3
+ size 14512
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0625e7925059ea11ce72ce5493d572a56f3aed17c9e4c55bb28b0c6d1eb72d
3
+ size 1064
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
checkpoint-1500/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,2683 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.6666666666666665,
5
+ "eval_steps": 60,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.011111111111111112,
13
+ "grad_norm": 2.362602949142456,
14
+ "learning_rate": 2.7777777777777776e-07,
15
+ "logits/chosen": -1.400684118270874,
16
+ "logits/rejected": -1.4005341529846191,
17
+ "logps/chosen": -174.8197021484375,
18
+ "logps/rejected": -174.18280029296875,
19
+ "loss": 0.6981,
20
+ "rewards/accuracies": 0.3700000047683716,
21
+ "rewards/chosen": -0.017464280128479004,
22
+ "rewards/margins": -0.00935516320168972,
23
+ "rewards/rejected": -0.00810911599546671,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.022222222222222223,
28
+ "grad_norm": 2.706902027130127,
29
+ "learning_rate": 5.555555555555555e-07,
30
+ "logits/chosen": -1.401512622833252,
31
+ "logits/rejected": -1.4014896154403687,
32
+ "logps/chosen": -172.8441162109375,
33
+ "logps/rejected": -176.39537048339844,
34
+ "loss": 0.6945,
35
+ "rewards/accuracies": 0.4599999785423279,
36
+ "rewards/chosen": -0.015734069049358368,
37
+ "rewards/margins": -0.0022257084492594004,
38
+ "rewards/rejected": -0.01350836269557476,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.03333333333333333,
43
+ "grad_norm": 2.120821714401245,
44
+ "learning_rate": 8.333333333333333e-07,
45
+ "logits/chosen": -1.3998275995254517,
46
+ "logits/rejected": -1.3999087810516357,
47
+ "logps/chosen": -173.80712890625,
48
+ "logps/rejected": -175.36126708984375,
49
+ "loss": 0.6927,
50
+ "rewards/accuracies": 0.5099999904632568,
51
+ "rewards/chosen": -0.00933685339987278,
52
+ "rewards/margins": 0.0013576654018834233,
53
+ "rewards/rejected": -0.010694518685340881,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.044444444444444446,
58
+ "grad_norm": 0.6226487159729004,
59
+ "learning_rate": 1.111111111111111e-06,
60
+ "logits/chosen": -1.4011458158493042,
61
+ "logits/rejected": -1.4012081623077393,
62
+ "logps/chosen": -173.29324340820312,
63
+ "logps/rejected": -175.90345764160156,
64
+ "loss": 0.6926,
65
+ "rewards/accuracies": 0.5099999904632568,
66
+ "rewards/chosen": -0.02281300537288189,
67
+ "rewards/margins": 0.0015505983028560877,
68
+ "rewards/rejected": -0.024363603442907333,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.05555555555555555,
73
+ "grad_norm": 2.68591046333313,
74
+ "learning_rate": 1.3888888888888892e-06,
75
+ "logits/chosen": -1.4008080959320068,
76
+ "logits/rejected": -1.4006825685501099,
77
+ "logps/chosen": -175.80612182617188,
78
+ "logps/rejected": -173.04119873046875,
79
+ "loss": 0.6942,
80
+ "rewards/accuracies": 0.5000000596046448,
81
+ "rewards/chosen": -0.014659256674349308,
82
+ "rewards/margins": -0.0015078135766088963,
83
+ "rewards/rejected": -0.013151444494724274,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.06666666666666667,
88
+ "grad_norm": 0.6941749453544617,
89
+ "learning_rate": 1.6666666666666667e-06,
90
+ "logits/chosen": -1.4003050327301025,
91
+ "logits/rejected": -1.4006407260894775,
92
+ "logps/chosen": -174.0802001953125,
93
+ "logps/rejected": -175.01547241210938,
94
+ "loss": 0.6939,
95
+ "rewards/accuracies": 0.5100000500679016,
96
+ "rewards/chosen": -0.026361756026744843,
97
+ "rewards/margins": -0.0008379966020584106,
98
+ "rewards/rejected": -0.025523759424686432,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.06666666666666667,
103
+ "eval_logits/chosen": -1.4009861946105957,
104
+ "eval_logits/rejected": -1.4008183479309082,
105
+ "eval_logps/chosen": -175.24819946289062,
106
+ "eval_logps/rejected": -173.85289001464844,
107
+ "eval_loss": 0.6920965313911438,
108
+ "eval_rewards/accuracies": 0.5189999938011169,
109
+ "eval_rewards/chosen": -0.021925970911979675,
110
+ "eval_rewards/margins": 0.0026434571482241154,
111
+ "eval_rewards/rejected": -0.024569429457187653,
112
+ "eval_runtime": 318.9511,
113
+ "eval_samples_per_second": 3.135,
114
+ "eval_steps_per_second": 0.314,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.07777777777777778,
119
+ "grad_norm": 1.3399503231048584,
120
+ "learning_rate": 1.944444444444445e-06,
121
+ "logits/chosen": -1.4007337093353271,
122
+ "logits/rejected": -1.4006619453430176,
123
+ "logps/chosen": -173.1317138671875,
124
+ "logps/rejected": -175.83157348632812,
125
+ "loss": 0.6926,
126
+ "rewards/accuracies": 0.5200000405311584,
127
+ "rewards/chosen": -0.02405247837305069,
128
+ "rewards/margins": 0.001808380475267768,
129
+ "rewards/rejected": -0.025860857218503952,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.08888888888888889,
134
+ "grad_norm": 4.030770778656006,
135
+ "learning_rate": 2.222222222222222e-06,
136
+ "logits/chosen": -1.400660753250122,
137
+ "logits/rejected": -1.4007993936538696,
138
+ "logps/chosen": -172.63229370117188,
139
+ "logps/rejected": -176.5906524658203,
140
+ "loss": 0.6849,
141
+ "rewards/accuracies": 0.5900000333786011,
142
+ "rewards/chosen": -0.013674546033143997,
143
+ "rewards/margins": 0.017781419679522514,
144
+ "rewards/rejected": -0.03145596385002136,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.1,
149
+ "grad_norm": 7.06594181060791,
150
+ "learning_rate": 2.5e-06,
151
+ "logits/chosen": -1.4002556800842285,
152
+ "logits/rejected": -1.400156021118164,
153
+ "logps/chosen": -176.54403686523438,
154
+ "logps/rejected": -172.20162963867188,
155
+ "loss": 0.6946,
156
+ "rewards/accuracies": 0.44999998807907104,
157
+ "rewards/chosen": -0.01780758798122406,
158
+ "rewards/margins": -0.0011917415540665388,
159
+ "rewards/rejected": -0.016615845263004303,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.1111111111111111,
164
+ "grad_norm": 4.663311004638672,
165
+ "learning_rate": 2.7777777777777783e-06,
166
+ "logits/chosen": -1.40169358253479,
167
+ "logits/rejected": -1.4018887281417847,
168
+ "logps/chosen": -174.86729431152344,
169
+ "logps/rejected": -174.28994750976562,
170
+ "loss": 0.6925,
171
+ "rewards/accuracies": 0.5099999904632568,
172
+ "rewards/chosen": -0.020927399396896362,
173
+ "rewards/margins": 0.002672073431313038,
174
+ "rewards/rejected": -0.023599475622177124,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.12222222222222222,
179
+ "grad_norm": 2.7771716117858887,
180
+ "learning_rate": 3.055555555555556e-06,
181
+ "logits/chosen": -1.4018511772155762,
182
+ "logits/rejected": -1.401686668395996,
183
+ "logps/chosen": -175.4040069580078,
184
+ "logps/rejected": -173.77352905273438,
185
+ "loss": 0.6917,
186
+ "rewards/accuracies": 0.5,
187
+ "rewards/chosen": -0.02241549640893936,
188
+ "rewards/margins": 0.004419571254402399,
189
+ "rewards/rejected": -0.026835069060325623,
190
+ "step": 110
191
+ },
192
+ {
193
+ "epoch": 0.13333333333333333,
194
+ "grad_norm": 2.225400686264038,
195
+ "learning_rate": 3.3333333333333333e-06,
196
+ "logits/chosen": -1.4029500484466553,
197
+ "logits/rejected": -1.4027996063232422,
198
+ "logps/chosen": -175.29742431640625,
199
+ "logps/rejected": -174.22561645507812,
200
+ "loss": 0.6871,
201
+ "rewards/accuracies": 0.5600000023841858,
202
+ "rewards/chosen": -0.03488890081644058,
203
+ "rewards/margins": 0.013112092390656471,
204
+ "rewards/rejected": -0.0480009950697422,
205
+ "step": 120
206
+ },
207
+ {
208
+ "epoch": 0.13333333333333333,
209
+ "eval_logits/chosen": -1.403046727180481,
210
+ "eval_logits/rejected": -1.4029061794281006,
211
+ "eval_logps/chosen": -175.306884765625,
212
+ "eval_logps/rejected": -174.10104370117188,
213
+ "eval_loss": 0.6829859018325806,
214
+ "eval_rewards/accuracies": 0.6079999804496765,
215
+ "eval_rewards/chosen": -0.027797138318419456,
216
+ "eval_rewards/margins": 0.021586475893855095,
217
+ "eval_rewards/rejected": -0.04938361421227455,
218
+ "eval_runtime": 319.5591,
219
+ "eval_samples_per_second": 3.129,
220
+ "eval_steps_per_second": 0.313,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.14444444444444443,
225
+ "grad_norm": 4.428592205047607,
226
+ "learning_rate": 3.6111111111111115e-06,
227
+ "logits/chosen": -1.4035028219223022,
228
+ "logits/rejected": -1.403373122215271,
229
+ "logps/chosen": -175.11550903320312,
230
+ "logps/rejected": -174.84075927734375,
231
+ "loss": 0.6805,
232
+ "rewards/accuracies": 0.6200000643730164,
233
+ "rewards/chosen": -0.05135633796453476,
234
+ "rewards/margins": 0.027625277638435364,
235
+ "rewards/rejected": -0.07898162305355072,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.15555555555555556,
240
+ "grad_norm": 1.5452574491500854,
241
+ "learning_rate": 3.88888888888889e-06,
242
+ "logits/chosen": -1.4023932218551636,
243
+ "logits/rejected": -1.402073621749878,
244
+ "logps/chosen": -174.4642791748047,
245
+ "logps/rejected": -176.83168029785156,
246
+ "loss": 0.6804,
247
+ "rewards/accuracies": 0.6299999952316284,
248
+ "rewards/chosen": -0.11876146495342255,
249
+ "rewards/margins": 0.02933622896671295,
250
+ "rewards/rejected": -0.1480976939201355,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.16666666666666666,
255
+ "grad_norm": 0.9253703951835632,
256
+ "learning_rate": 4.166666666666667e-06,
257
+ "logits/chosen": -1.4006946086883545,
258
+ "logits/rejected": -1.400911808013916,
259
+ "logps/chosen": -176.50845336914062,
260
+ "logps/rejected": -175.89736938476562,
261
+ "loss": 0.6765,
262
+ "rewards/accuracies": 0.6399999856948853,
263
+ "rewards/chosen": -0.17112146317958832,
264
+ "rewards/margins": 0.03798893839120865,
265
+ "rewards/rejected": -0.20911039412021637,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.17777777777777778,
270
+ "grad_norm": 4.935380935668945,
271
+ "learning_rate": 4.444444444444444e-06,
272
+ "logits/chosen": -1.399414300918579,
273
+ "logits/rejected": -1.399838924407959,
274
+ "logps/chosen": -176.39724731445312,
275
+ "logps/rejected": -178.42300415039062,
276
+ "loss": 0.6537,
277
+ "rewards/accuracies": 0.7100000381469727,
278
+ "rewards/chosen": -0.25529032945632935,
279
+ "rewards/margins": 0.08817656338214874,
280
+ "rewards/rejected": -0.3434668779373169,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.18888888888888888,
285
+ "grad_norm": 1.3383221626281738,
286
+ "learning_rate": 4.722222222222222e-06,
287
+ "logits/chosen": -1.3981242179870605,
288
+ "logits/rejected": -1.398409128189087,
289
+ "logps/chosen": -179.46847534179688,
290
+ "logps/rejected": -177.86688232421875,
291
+ "loss": 0.684,
292
+ "rewards/accuracies": 0.550000011920929,
293
+ "rewards/chosen": -0.40547820925712585,
294
+ "rewards/margins": 0.04951518028974533,
295
+ "rewards/rejected": -0.4549933969974518,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.2,
300
+ "grad_norm": 6.545588493347168,
301
+ "learning_rate": 5e-06,
302
+ "logits/chosen": -1.3984978199005127,
303
+ "logits/rejected": -1.3985638618469238,
304
+ "logps/chosen": -180.9668426513672,
305
+ "logps/rejected": -178.43746948242188,
306
+ "loss": 0.6159,
307
+ "rewards/accuracies": 0.7599999904632568,
308
+ "rewards/chosen": -0.4513840079307556,
309
+ "rewards/margins": 0.18196940422058105,
310
+ "rewards/rejected": -0.6333533525466919,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.2,
315
+ "eval_logits/chosen": -1.4020743370056152,
316
+ "eval_logits/rejected": -1.402461051940918,
317
+ "eval_logps/chosen": -180.4278564453125,
318
+ "eval_logps/rejected": -180.83172607421875,
319
+ "eval_loss": 0.6382298469543457,
320
+ "eval_rewards/accuracies": 0.5610000491142273,
321
+ "eval_rewards/chosen": -0.5398944616317749,
322
+ "eval_rewards/margins": 0.18255746364593506,
323
+ "eval_rewards/rejected": -0.72245192527771,
324
+ "eval_runtime": 319.2836,
325
+ "eval_samples_per_second": 3.132,
326
+ "eval_steps_per_second": 0.313,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 0.2111111111111111,
331
+ "grad_norm": 2.0203661918640137,
332
+ "learning_rate": 4.999529926121254e-06,
333
+ "logits/chosen": -1.396078109741211,
334
+ "logits/rejected": -1.3954544067382812,
335
+ "logps/chosen": -180.74969482421875,
336
+ "logps/rejected": -182.64613342285156,
337
+ "loss": 0.6337,
338
+ "rewards/accuracies": 0.5700000524520874,
339
+ "rewards/chosen": -0.6385375261306763,
340
+ "rewards/margins": 0.19739526510238647,
341
+ "rewards/rejected": -0.8359327912330627,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 0.2222222222222222,
346
+ "grad_norm": 5.894029140472412,
347
+ "learning_rate": 4.998119881260576e-06,
348
+ "logits/chosen": -1.390157699584961,
349
+ "logits/rejected": -1.3912606239318848,
350
+ "logps/chosen": -181.57754516601562,
351
+ "logps/rejected": -183.00576782226562,
352
+ "loss": 0.5749,
353
+ "rewards/accuracies": 0.8199999928474426,
354
+ "rewards/chosen": -0.6484101414680481,
355
+ "rewards/margins": 0.3046451807022095,
356
+ "rewards/rejected": -0.9530552625656128,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.23333333333333334,
361
+ "grad_norm": 4.795431613922119,
362
+ "learning_rate": 4.995770395678171e-06,
363
+ "logits/chosen": -1.390209436416626,
364
+ "logits/rejected": -1.3919038772583008,
365
+ "logps/chosen": -181.8658447265625,
366
+ "logps/rejected": -183.79417419433594,
367
+ "loss": 0.5556,
368
+ "rewards/accuracies": 0.75,
369
+ "rewards/chosen": -0.6416223049163818,
370
+ "rewards/margins": 0.4046136736869812,
371
+ "rewards/rejected": -1.0462360382080078,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.24444444444444444,
376
+ "grad_norm": 8.91357421875,
377
+ "learning_rate": 4.99248235291948e-06,
378
+ "logits/chosen": -1.3888887166976929,
379
+ "logits/rejected": -1.3894532918930054,
380
+ "logps/chosen": -179.56829833984375,
381
+ "logps/rejected": -189.20083618164062,
382
+ "loss": 0.4952,
383
+ "rewards/accuracies": 0.800000011920929,
384
+ "rewards/chosen": -0.6512977480888367,
385
+ "rewards/margins": 0.7006000876426697,
386
+ "rewards/rejected": -1.3518978357315063,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 0.25555555555555554,
391
+ "grad_norm": 14.271614074707031,
392
+ "learning_rate": 4.9882569894829146e-06,
393
+ "logits/chosen": -1.3921380043029785,
394
+ "logits/rejected": -1.393751859664917,
395
+ "logps/chosen": -185.2764892578125,
396
+ "logps/rejected": -192.3001708984375,
397
+ "loss": 0.5098,
398
+ "rewards/accuracies": 0.7599999904632568,
399
+ "rewards/chosen": -1.0151185989379883,
400
+ "rewards/margins": 0.8646041750907898,
401
+ "rewards/rejected": -1.8797227144241333,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 0.26666666666666666,
406
+ "grad_norm": 2.420156240463257,
407
+ "learning_rate": 4.983095894354858e-06,
408
+ "logits/chosen": -1.39105224609375,
409
+ "logits/rejected": -1.392564296722412,
410
+ "logps/chosen": -186.03451538085938,
411
+ "logps/rejected": -201.23435974121094,
412
+ "loss": 0.368,
413
+ "rewards/accuracies": 0.8300000429153442,
414
+ "rewards/chosen": -1.2737812995910645,
415
+ "rewards/margins": 1.303347110748291,
416
+ "rewards/rejected": -2.5771284103393555,
417
+ "step": 240
418
+ },
419
+ {
420
+ "epoch": 0.26666666666666666,
421
+ "eval_logits/chosen": -1.3971052169799805,
422
+ "eval_logits/rejected": -1.3996238708496094,
423
+ "eval_logps/chosen": -188.56735229492188,
424
+ "eval_logps/rejected": -201.0563201904297,
425
+ "eval_loss": 0.3848608434200287,
426
+ "eval_rewards/accuracies": 0.8309999704360962,
427
+ "eval_rewards/chosen": -1.3538421392440796,
428
+ "eval_rewards/margins": 1.3910682201385498,
429
+ "eval_rewards/rejected": -2.74491024017334,
430
+ "eval_runtime": 319.0097,
431
+ "eval_samples_per_second": 3.135,
432
+ "eval_steps_per_second": 0.313,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 0.2777777777777778,
437
+ "grad_norm": 14.02056884765625,
438
+ "learning_rate": 4.977001008412113e-06,
439
+ "logits/chosen": -1.3970434665679932,
440
+ "logits/rejected": -1.400298833847046,
441
+ "logps/chosen": -185.9792022705078,
442
+ "logps/rejected": -203.23114013671875,
443
+ "loss": 0.324,
444
+ "rewards/accuracies": 0.8600000143051147,
445
+ "rewards/chosen": -1.1137562990188599,
446
+ "rewards/margins": 1.8328487873077393,
447
+ "rewards/rejected": -2.9466049671173096,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 0.28888888888888886,
452
+ "grad_norm": 3.589820146560669,
453
+ "learning_rate": 4.969974623692023e-06,
454
+ "logits/chosen": -1.4056309461593628,
455
+ "logits/rejected": -1.4085218906402588,
456
+ "logps/chosen": -185.17918395996094,
457
+ "logps/rejected": -209.30335998535156,
458
+ "loss": 0.2772,
459
+ "rewards/accuracies": 0.8800000548362732,
460
+ "rewards/chosen": -1.051544189453125,
461
+ "rewards/margins": 2.4677376747131348,
462
+ "rewards/rejected": -3.5192818641662598,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 0.3,
467
+ "grad_norm": 4.202933311462402,
468
+ "learning_rate": 4.962019382530521e-06,
469
+ "logits/chosen": -1.4178866147994995,
470
+ "logits/rejected": -1.4198402166366577,
471
+ "logps/chosen": -191.2581329345703,
472
+ "logps/rejected": -217.56085205078125,
473
+ "loss": 0.2959,
474
+ "rewards/accuracies": 0.8700000047683716,
475
+ "rewards/chosen": -1.7226934432983398,
476
+ "rewards/margins": 2.5767905712127686,
477
+ "rewards/rejected": -4.2994842529296875,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 0.3111111111111111,
482
+ "grad_norm": 4.351930141448975,
483
+ "learning_rate": 4.953138276568462e-06,
484
+ "logits/chosen": -1.4250727891921997,
485
+ "logits/rejected": -1.427567720413208,
486
+ "logps/chosen": -200.7665557861328,
487
+ "logps/rejected": -221.02357482910156,
488
+ "loss": 0.4344,
489
+ "rewards/accuracies": 0.7900000214576721,
490
+ "rewards/chosen": -2.566577434539795,
491
+ "rewards/margins": 2.1860404014587402,
492
+ "rewards/rejected": -4.752617835998535,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.32222222222222224,
497
+ "grad_norm": 9.703364372253418,
498
+ "learning_rate": 4.943334645626589e-06,
499
+ "logits/chosen": -1.4243228435516357,
500
+ "logits/rejected": -1.4278262853622437,
501
+ "logps/chosen": -197.0714111328125,
502
+ "logps/rejected": -221.6966552734375,
503
+ "loss": 0.3466,
504
+ "rewards/accuracies": 0.8199999928474426,
505
+ "rewards/chosen": -2.2351460456848145,
506
+ "rewards/margins": 2.5265071392059326,
507
+ "rewards/rejected": -4.761653900146484,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.3333333333333333,
512
+ "grad_norm": 14.332489967346191,
513
+ "learning_rate": 4.93261217644956e-06,
514
+ "logits/chosen": -1.4260220527648926,
515
+ "logits/rejected": -1.4290738105773926,
516
+ "logps/chosen": -194.31724548339844,
517
+ "logps/rejected": -221.0859832763672,
518
+ "loss": 0.3234,
519
+ "rewards/accuracies": 0.8800000548362732,
520
+ "rewards/chosen": -2.019387722015381,
521
+ "rewards/margins": 2.6289873123168945,
522
+ "rewards/rejected": -4.648375034332275,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.3333333333333333,
527
+ "eval_logits/chosen": -1.4247881174087524,
528
+ "eval_logits/rejected": -1.4282124042510986,
529
+ "eval_logps/chosen": -196.38650512695312,
530
+ "eval_logps/rejected": -219.71144104003906,
531
+ "eval_loss": 0.3633359372615814,
532
+ "eval_rewards/accuracies": 0.8229999542236328,
533
+ "eval_rewards/chosen": -2.135758876800537,
534
+ "eval_rewards/margins": 2.4746649265289307,
535
+ "eval_rewards/rejected": -4.610424041748047,
536
+ "eval_runtime": 319.0479,
537
+ "eval_samples_per_second": 3.134,
538
+ "eval_steps_per_second": 0.313,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 0.34444444444444444,
543
+ "grad_norm": 26.149131774902344,
544
+ "learning_rate": 4.9209749013195155e-06,
545
+ "logits/chosen": -1.4286975860595703,
546
+ "logits/rejected": -1.43110990524292,
547
+ "logps/chosen": -191.86825561523438,
548
+ "logps/rejected": -218.36767578125,
549
+ "loss": 0.3799,
550
+ "rewards/accuracies": 0.8300000429153442,
551
+ "rewards/chosen": -1.8485496044158936,
552
+ "rewards/margins": 2.443417549133301,
553
+ "rewards/rejected": -4.291967391967773,
554
+ "step": 310
555
+ },
556
+ {
557
+ "epoch": 0.35555555555555557,
558
+ "grad_norm": 18.254680633544922,
559
+ "learning_rate": 4.908427196539701e-06,
560
+ "logits/chosen": -1.4264110326766968,
561
+ "logits/rejected": -1.4311984777450562,
562
+ "logps/chosen": -196.54238891601562,
563
+ "logps/rejected": -215.0438232421875,
564
+ "loss": 0.3149,
565
+ "rewards/accuracies": 0.8399999737739563,
566
+ "rewards/chosen": -2.009295701980591,
567
+ "rewards/margins": 2.2745771408081055,
568
+ "rewards/rejected": -4.283872604370117,
569
+ "step": 320
570
+ },
571
+ {
572
+ "epoch": 0.36666666666666664,
573
+ "grad_norm": 20.668800354003906,
574
+ "learning_rate": 4.894973780788722e-06,
575
+ "logits/chosen": -1.4264931678771973,
576
+ "logits/rejected": -1.4278137683868408,
577
+ "logps/chosen": -198.57382202148438,
578
+ "logps/rejected": -217.05438232421875,
579
+ "loss": 0.4159,
580
+ "rewards/accuracies": 0.8100000023841858,
581
+ "rewards/chosen": -2.5315957069396973,
582
+ "rewards/margins": 1.5949325561523438,
583
+ "rewards/rejected": -4.126528739929199,
584
+ "step": 330
585
+ },
586
+ {
587
+ "epoch": 0.37777777777777777,
588
+ "grad_norm": 4.467871189117432,
589
+ "learning_rate": 4.8806197133460385e-06,
590
+ "logits/chosen": -1.4277429580688477,
591
+ "logits/rejected": -1.4302550554275513,
592
+ "logps/chosen": -204.53775024414062,
593
+ "logps/rejected": -220.16055297851562,
594
+ "loss": 0.3476,
595
+ "rewards/accuracies": 0.8499999642372131,
596
+ "rewards/chosen": -2.966139316558838,
597
+ "rewards/margins": 1.6775035858154297,
598
+ "rewards/rejected": -4.643642425537109,
599
+ "step": 340
600
+ },
601
+ {
602
+ "epoch": 0.3888888888888889,
603
+ "grad_norm": 7.6644816398620605,
604
+ "learning_rate": 4.865370392189377e-06,
605
+ "logits/chosen": -1.43019437789917,
606
+ "logits/rejected": -1.4324309825897217,
607
+ "logps/chosen": -203.60850524902344,
608
+ "logps/rejected": -224.7152862548828,
609
+ "loss": 0.2798,
610
+ "rewards/accuracies": 0.8700000047683716,
611
+ "rewards/chosen": -2.942948818206787,
612
+ "rewards/margins": 2.063199996948242,
613
+ "rewards/rejected": -5.006148338317871,
614
+ "step": 350
615
+ },
616
+ {
617
+ "epoch": 0.4,
618
+ "grad_norm": 13.925436019897461,
619
+ "learning_rate": 4.849231551964771e-06,
620
+ "logits/chosen": -1.4413893222808838,
621
+ "logits/rejected": -1.4455211162567139,
622
+ "logps/chosen": -205.908447265625,
623
+ "logps/rejected": -226.8455810546875,
624
+ "loss": 0.2649,
625
+ "rewards/accuracies": 0.8700000643730164,
626
+ "rewards/chosen": -3.027750253677368,
627
+ "rewards/margins": 2.361143112182617,
628
+ "rewards/rejected": -5.388893127441406,
629
+ "step": 360
630
+ },
631
+ {
632
+ "epoch": 0.4,
633
+ "eval_logits/chosen": -1.4411193132400513,
634
+ "eval_logits/rejected": -1.4450273513793945,
635
+ "eval_logps/chosen": -208.10166931152344,
636
+ "eval_logps/rejected": -233.96986389160156,
637
+ "eval_loss": 0.3037000298500061,
638
+ "eval_rewards/accuracies": 0.8799999952316284,
639
+ "eval_rewards/chosen": -3.3072755336761475,
640
+ "eval_rewards/margins": 2.7289905548095703,
641
+ "eval_rewards/rejected": -6.036265850067139,
642
+ "eval_runtime": 318.9985,
643
+ "eval_samples_per_second": 3.135,
644
+ "eval_steps_per_second": 0.313,
645
+ "step": 360
646
+ },
647
+ {
648
+ "epoch": 0.4111111111111111,
649
+ "grad_norm": 4.158270835876465,
650
+ "learning_rate": 4.832209261830002e-06,
651
+ "logits/chosen": -1.4424656629562378,
652
+ "logits/rejected": -1.4435977935791016,
653
+ "logps/chosen": -208.0481414794922,
654
+ "logps/rejected": -236.48324584960938,
655
+ "loss": 0.353,
656
+ "rewards/accuracies": 0.8700000047683716,
657
+ "rewards/chosen": -3.5577611923217773,
658
+ "rewards/margins": 2.4773597717285156,
659
+ "rewards/rejected": -6.035120964050293,
660
+ "step": 370
661
+ },
662
+ {
663
+ "epoch": 0.4222222222222222,
664
+ "grad_norm": 3.5422561168670654,
665
+ "learning_rate": 4.814309923172227e-06,
666
+ "logits/chosen": -1.4452104568481445,
667
+ "logits/rejected": -1.4488377571105957,
668
+ "logps/chosen": -204.97947692871094,
669
+ "logps/rejected": -231.20712280273438,
670
+ "loss": 0.3429,
671
+ "rewards/accuracies": 0.8300000429153442,
672
+ "rewards/chosen": -3.058133125305176,
673
+ "rewards/margins": 2.625974178314209,
674
+ "rewards/rejected": -5.684107780456543,
675
+ "step": 380
676
+ },
677
+ {
678
+ "epoch": 0.43333333333333335,
679
+ "grad_norm": 16.114534378051758,
680
+ "learning_rate": 4.7955402672006855e-06,
681
+ "logits/chosen": -1.440530776977539,
682
+ "logits/rejected": -1.4443151950836182,
683
+ "logps/chosen": -205.27835083007812,
684
+ "logps/rejected": -236.82347106933594,
685
+ "loss": 0.2045,
686
+ "rewards/accuracies": 0.9300000071525574,
687
+ "rewards/chosen": -3.113431215286255,
688
+ "rewards/margins": 3.1173110008239746,
689
+ "rewards/rejected": -6.23074197769165,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 0.4444444444444444,
694
+ "grad_norm": 22.146488189697266,
695
+ "learning_rate": 4.775907352415367e-06,
696
+ "logits/chosen": -1.4472781419754028,
697
+ "logits/rejected": -1.4499727487564087,
698
+ "logps/chosen": -199.02243041992188,
699
+ "logps/rejected": -242.6939697265625,
700
+ "loss": 0.2361,
701
+ "rewards/accuracies": 0.9100000262260437,
702
+ "rewards/chosen": -2.694483757019043,
703
+ "rewards/margins": 3.9368107318878174,
704
+ "rewards/rejected": -6.631294250488281,
705
+ "step": 400
706
+ },
707
+ {
708
+ "epoch": 0.45555555555555555,
709
+ "grad_norm": 16.819496154785156,
710
+ "learning_rate": 4.755418561952595e-06,
711
+ "logits/chosen": -1.4456830024719238,
712
+ "logits/rejected": -1.4492114782333374,
713
+ "logps/chosen": -207.8698272705078,
714
+ "logps/rejected": -238.99583435058594,
715
+ "loss": 0.2863,
716
+ "rewards/accuracies": 0.9100000858306885,
717
+ "rewards/chosen": -3.3276515007019043,
718
+ "rewards/margins": 3.1782994270324707,
719
+ "rewards/rejected": -6.505950927734375,
720
+ "step": 410
721
+ },
722
+ {
723
+ "epoch": 0.4666666666666667,
724
+ "grad_norm": 15.385212898254395,
725
+ "learning_rate": 4.734081600808531e-06,
726
+ "logits/chosen": -1.448960542678833,
727
+ "logits/rejected": -1.4532960653305054,
728
+ "logps/chosen": -210.46075439453125,
729
+ "logps/rejected": -245.5928955078125,
730
+ "loss": 0.1784,
731
+ "rewards/accuracies": 0.9300000667572021,
732
+ "rewards/chosen": -3.5726406574249268,
733
+ "rewards/margins": 3.5739850997924805,
734
+ "rewards/rejected": -7.146625995635986,
735
+ "step": 420
736
+ },
737
+ {
738
+ "epoch": 0.4666666666666667,
739
+ "eval_logits/chosen": -1.4469826221466064,
740
+ "eval_logits/rejected": -1.4523011445999146,
741
+ "eval_logps/chosen": -213.9627685546875,
742
+ "eval_logps/rejected": -244.39593505859375,
743
+ "eval_loss": 0.2159292995929718,
744
+ "eval_rewards/accuracies": 0.9099999666213989,
745
+ "eval_rewards/chosen": -3.8933866024017334,
746
+ "eval_rewards/margins": 3.185485601425171,
747
+ "eval_rewards/rejected": -7.078872203826904,
748
+ "eval_runtime": 319.0594,
749
+ "eval_samples_per_second": 3.134,
750
+ "eval_steps_per_second": 0.313,
751
+ "step": 420
752
+ },
753
+ {
754
+ "epoch": 0.4777777777777778,
755
+ "grad_norm": 35.55814743041992,
756
+ "learning_rate": 4.711904492941644e-06,
757
+ "logits/chosen": -1.4515868425369263,
758
+ "logits/rejected": -1.4541680812835693,
759
+ "logps/chosen": -207.43453979492188,
760
+ "logps/rejected": -246.10247802734375,
761
+ "loss": 0.2279,
762
+ "rewards/accuracies": 0.9099999666213989,
763
+ "rewards/chosen": -3.5352389812469482,
764
+ "rewards/margins": 3.39831805229187,
765
+ "rewards/rejected": -6.933557033538818,
766
+ "step": 430
767
+ },
768
+ {
769
+ "epoch": 0.4888888888888889,
770
+ "grad_norm": 18.41891098022461,
771
+ "learning_rate": 4.688895578255228e-06,
772
+ "logits/chosen": -1.4477709531784058,
773
+ "logits/rejected": -1.4553776979446411,
774
+ "logps/chosen": -215.75033569335938,
775
+ "logps/rejected": -245.9658203125,
776
+ "loss": 0.2779,
777
+ "rewards/accuracies": 0.8600000143051147,
778
+ "rewards/chosen": -3.823634147644043,
779
+ "rewards/margins": 3.665213108062744,
780
+ "rewards/rejected": -7.488846778869629,
781
+ "step": 440
782
+ },
783
+ {
784
+ "epoch": 0.5,
785
+ "grad_norm": 15.392614364624023,
786
+ "learning_rate": 4.665063509461098e-06,
787
+ "logits/chosen": -1.4473040103912354,
788
+ "logits/rejected": -1.4520621299743652,
789
+ "logps/chosen": -212.28256225585938,
790
+ "logps/rejected": -245.33755493164062,
791
+ "loss": 0.2924,
792
+ "rewards/accuracies": 0.89000004529953,
793
+ "rewards/chosen": -3.778430461883545,
794
+ "rewards/margins": 3.308140277862549,
795
+ "rewards/rejected": -7.086570739746094,
796
+ "step": 450
797
+ },
798
+ {
799
+ "epoch": 0.5111111111111111,
800
+ "grad_norm": 19.698705673217773,
801
+ "learning_rate": 4.640417248825667e-06,
802
+ "logits/chosen": -1.4431393146514893,
803
+ "logits/rejected": -1.4465763568878174,
804
+ "logps/chosen": -209.155517578125,
805
+ "logps/rejected": -247.68649291992188,
806
+ "loss": 0.1966,
807
+ "rewards/accuracies": 0.9200000166893005,
808
+ "rewards/chosen": -3.5632858276367188,
809
+ "rewards/margins": 3.6926655769348145,
810
+ "rewards/rejected": -7.255951404571533,
811
+ "step": 460
812
+ },
813
+ {
814
+ "epoch": 0.5222222222222223,
815
+ "grad_norm": 3.17411208152771,
816
+ "learning_rate": 4.614966064799603e-06,
817
+ "logits/chosen": -1.4454569816589355,
818
+ "logits/rejected": -1.4508020877838135,
819
+ "logps/chosen": -214.06642150878906,
820
+ "logps/rejected": -249.29022216796875,
821
+ "loss": 0.1576,
822
+ "rewards/accuracies": 0.9399999976158142,
823
+ "rewards/chosen": -3.891676902770996,
824
+ "rewards/margins": 3.6903645992279053,
825
+ "rewards/rejected": -7.5820417404174805,
826
+ "step": 470
827
+ },
828
+ {
829
+ "epoch": 0.5333333333333333,
830
+ "grad_norm": 3.511045455932617,
831
+ "learning_rate": 4.588719528532342e-06,
832
+ "logits/chosen": -1.4526777267456055,
833
+ "logits/rejected": -1.4565974473953247,
834
+ "logps/chosen": -209.6256103515625,
835
+ "logps/rejected": -252.88116455078125,
836
+ "loss": 0.2608,
837
+ "rewards/accuracies": 0.8700000643730164,
838
+ "rewards/chosen": -3.6902856826782227,
839
+ "rewards/margins": 4.007488250732422,
840
+ "rewards/rejected": -7.6977739334106445,
841
+ "step": 480
842
+ },
843
+ {
844
+ "epoch": 0.5333333333333333,
845
+ "eval_logits/chosen": -1.4509010314941406,
846
+ "eval_logits/rejected": -1.4571257829666138,
847
+ "eval_logps/chosen": -213.10494995117188,
848
+ "eval_logps/rejected": -252.49603271484375,
849
+ "eval_loss": 0.20726382732391357,
850
+ "eval_rewards/accuracies": 0.9099999666213989,
851
+ "eval_rewards/chosen": -3.8076045513153076,
852
+ "eval_rewards/margins": 4.0812788009643555,
853
+ "eval_rewards/rejected": -7.888883590698242,
854
+ "eval_runtime": 319.0436,
855
+ "eval_samples_per_second": 3.134,
856
+ "eval_steps_per_second": 0.313,
857
+ "step": 480
858
+ },
859
+ {
860
+ "epoch": 0.5444444444444444,
861
+ "grad_norm": 35.65738296508789,
862
+ "learning_rate": 4.561687510272767e-06,
863
+ "logits/chosen": -1.4541469812393188,
864
+ "logits/rejected": -1.4597184658050537,
865
+ "logps/chosen": -213.66517639160156,
866
+ "logps/rejected": -254.37350463867188,
867
+ "loss": 0.2904,
868
+ "rewards/accuracies": 0.8899999856948853,
869
+ "rewards/chosen": -4.016324043273926,
870
+ "rewards/margins": 3.9200973510742188,
871
+ "rewards/rejected": -7.9364213943481445,
872
+ "step": 490
873
+ },
874
+ {
875
+ "epoch": 0.5555555555555556,
876
+ "grad_norm": 15.376676559448242,
877
+ "learning_rate": 4.533880175657419e-06,
878
+ "logits/chosen": -1.4524576663970947,
879
+ "logits/rejected": -1.4585695266723633,
880
+ "logps/chosen": -218.01429748535156,
881
+ "logps/rejected": -257.30328369140625,
882
+ "loss": 0.2261,
883
+ "rewards/accuracies": 0.9100000262260437,
884
+ "rewards/chosen": -4.428624153137207,
885
+ "rewards/margins": 3.822225332260132,
886
+ "rewards/rejected": -8.250848770141602,
887
+ "step": 500
888
+ },
889
+ {
890
+ "epoch": 0.5666666666666667,
891
+ "grad_norm": 25.499267578125,
892
+ "learning_rate": 4.50530798188761e-06,
893
+ "logits/chosen": -1.451499342918396,
894
+ "logits/rejected": -1.4615750312805176,
895
+ "logps/chosen": -223.37664794921875,
896
+ "logps/rejected": -253.57177734375,
897
+ "loss": 0.2516,
898
+ "rewards/accuracies": 0.9000000357627869,
899
+ "rewards/chosen": -4.594554424285889,
900
+ "rewards/margins": 3.6208624839782715,
901
+ "rewards/rejected": -8.215417861938477,
902
+ "step": 510
903
+ },
904
+ {
905
+ "epoch": 0.5777777777777777,
906
+ "grad_norm": 42.641754150390625,
907
+ "learning_rate": 4.475981673796899e-06,
908
+ "logits/chosen": -1.4456167221069336,
909
+ "logits/rejected": -1.4504668712615967,
910
+ "logps/chosen": -213.45851135253906,
911
+ "logps/rejected": -259.6695251464844,
912
+ "loss": 0.2521,
913
+ "rewards/accuracies": 0.9200000762939453,
914
+ "rewards/chosen": -4.051717281341553,
915
+ "rewards/margins": 4.357028484344482,
916
+ "rewards/rejected": -8.408745765686035,
917
+ "step": 520
918
+ },
919
+ {
920
+ "epoch": 0.5888888888888889,
921
+ "grad_norm": 26.318056106567383,
922
+ "learning_rate": 4.445912279810401e-06,
923
+ "logits/chosen": -1.4452048540115356,
924
+ "logits/rejected": -1.4490594863891602,
925
+ "logps/chosen": -211.29248046875,
926
+ "logps/rejected": -264.21600341796875,
927
+ "loss": 0.2038,
928
+ "rewards/accuracies": 0.9000000357627869,
929
+ "rewards/chosen": -3.8537445068359375,
930
+ "rewards/margins": 4.980400085449219,
931
+ "rewards/rejected": -8.834144592285156,
932
+ "step": 530
933
+ },
934
+ {
935
+ "epoch": 0.6,
936
+ "grad_norm": 46.37030792236328,
937
+ "learning_rate": 4.415111107797445e-06,
938
+ "logits/chosen": -1.4452967643737793,
939
+ "logits/rejected": -1.448035478591919,
940
+ "logps/chosen": -221.65042114257812,
941
+ "logps/rejected": -268.7168273925781,
942
+ "loss": 0.2459,
943
+ "rewards/accuracies": 0.8399999737739563,
944
+ "rewards/chosen": -4.855015754699707,
945
+ "rewards/margins": 4.4556379318237305,
946
+ "rewards/rejected": -9.310652732849121,
947
+ "step": 540
948
+ },
949
+ {
950
+ "epoch": 0.6,
951
+ "eval_logits/chosen": -1.4478332996368408,
952
+ "eval_logits/rejected": -1.4528884887695312,
953
+ "eval_logps/chosen": -222.76666259765625,
954
+ "eval_logps/rejected": -269.6318664550781,
955
+ "eval_loss": 0.21725089848041534,
956
+ "eval_rewards/accuracies": 0.8889999389648438,
957
+ "eval_rewards/chosen": -4.773774147033691,
958
+ "eval_rewards/margins": 4.828692436218262,
959
+ "eval_rewards/rejected": -9.602466583251953,
960
+ "eval_runtime": 319.0307,
961
+ "eval_samples_per_second": 3.134,
962
+ "eval_steps_per_second": 0.313,
963
+ "step": 540
964
+ },
965
+ {
966
+ "epoch": 0.6111111111111112,
967
+ "grad_norm": 37.16395568847656,
968
+ "learning_rate": 4.3835897408191515e-06,
969
+ "logits/chosen": -1.450826644897461,
970
+ "logits/rejected": -1.4534823894500732,
971
+ "logps/chosen": -222.22439575195312,
972
+ "logps/rejected": -270.947998046875,
973
+ "loss": 0.1905,
974
+ "rewards/accuracies": 0.9300000071525574,
975
+ "rewards/chosen": -4.919099807739258,
976
+ "rewards/margins": 4.604528427124023,
977
+ "rewards/rejected": -9.523628234863281,
978
+ "step": 550
979
+ },
980
+ {
981
+ "epoch": 0.6222222222222222,
982
+ "grad_norm": 26.3408260345459,
983
+ "learning_rate": 4.351360032772512e-06,
984
+ "logits/chosen": -1.4518877267837524,
985
+ "logits/rejected": -1.4572858810424805,
986
+ "logps/chosen": -215.63409423828125,
987
+ "logps/rejected": -271.2196044921875,
988
+ "loss": 0.1935,
989
+ "rewards/accuracies": 0.940000057220459,
990
+ "rewards/chosen": -4.156116962432861,
991
+ "rewards/margins": 5.512393951416016,
992
+ "rewards/rejected": -9.668511390686035,
993
+ "step": 560
994
+ },
995
+ {
996
+ "epoch": 0.6333333333333333,
997
+ "grad_norm": 30.472354888916016,
998
+ "learning_rate": 4.318434103932622e-06,
999
+ "logits/chosen": -1.4471065998077393,
1000
+ "logits/rejected": -1.45332932472229,
1001
+ "logps/chosen": -217.19085693359375,
1002
+ "logps/rejected": -264.91046142578125,
1003
+ "loss": 0.3623,
1004
+ "rewards/accuracies": 0.8700000047683716,
1005
+ "rewards/chosen": -4.126136779785156,
1006
+ "rewards/margins": 5.096201419830322,
1007
+ "rewards/rejected": -9.22233772277832,
1008
+ "step": 570
1009
+ },
1010
+ {
1011
+ "epoch": 0.6444444444444445,
1012
+ "grad_norm": 17.42032814025879,
1013
+ "learning_rate": 4.284824336394748e-06,
1014
+ "logits/chosen": -1.4501465559005737,
1015
+ "logits/rejected": -1.4535834789276123,
1016
+ "logps/chosen": -216.29188537597656,
1017
+ "logps/rejected": -262.982421875,
1018
+ "loss": 0.2146,
1019
+ "rewards/accuracies": 0.9100000262260437,
1020
+ "rewards/chosen": -4.250003814697266,
1021
+ "rewards/margins": 4.562039852142334,
1022
+ "rewards/rejected": -8.812044143676758,
1023
+ "step": 580
1024
+ },
1025
+ {
1026
+ "epoch": 0.6555555555555556,
1027
+ "grad_norm": 8.025737762451172,
1028
+ "learning_rate": 4.250543369417921e-06,
1029
+ "logits/chosen": -1.4417762756347656,
1030
+ "logits/rejected": -1.445784568786621,
1031
+ "logps/chosen": -210.0897216796875,
1032
+ "logps/rejected": -259.0534973144531,
1033
+ "loss": 0.2008,
1034
+ "rewards/accuracies": 0.9000000357627869,
1035
+ "rewards/chosen": -3.6182351112365723,
1036
+ "rewards/margins": 4.80393123626709,
1037
+ "rewards/rejected": -8.42216682434082,
1038
+ "step": 590
1039
+ },
1040
+ {
1041
+ "epoch": 0.6666666666666666,
1042
+ "grad_norm": 47.6915397644043,
1043
+ "learning_rate": 4.215604094671835e-06,
1044
+ "logits/chosen": -1.4405059814453125,
1045
+ "logits/rejected": -1.4476011991500854,
1046
+ "logps/chosen": -208.40203857421875,
1047
+ "logps/rejected": -262.4669189453125,
1048
+ "loss": 0.1729,
1049
+ "rewards/accuracies": 0.9300000071525574,
1050
+ "rewards/chosen": -3.2039127349853516,
1051
+ "rewards/margins": 5.8355712890625,
1052
+ "rewards/rejected": -9.039484024047852,
1053
+ "step": 600
1054
+ },
1055
+ {
1056
+ "epoch": 0.6666666666666666,
1057
+ "eval_logits/chosen": -1.4379254579544067,
1058
+ "eval_logits/rejected": -1.4430339336395264,
1059
+ "eval_logps/chosen": -211.66957092285156,
1060
+ "eval_logps/rejected": -264.79345703125,
1061
+ "eval_loss": 0.22635750472545624,
1062
+ "eval_rewards/accuracies": 0.9199999570846558,
1063
+ "eval_rewards/chosen": -3.664064407348633,
1064
+ "eval_rewards/margins": 5.454564094543457,
1065
+ "eval_rewards/rejected": -9.118627548217773,
1066
+ "eval_runtime": 319.005,
1067
+ "eval_samples_per_second": 3.135,
1068
+ "eval_steps_per_second": 0.313,
1069
+ "step": 600
1070
+ },
1071
+ {
1072
+ "epoch": 0.6777777777777778,
1073
+ "grad_norm": 9.863251686096191,
1074
+ "learning_rate": 4.180019651388807e-06,
1075
+ "logits/chosen": -1.4420831203460693,
1076
+ "logits/rejected": -1.4478440284729004,
1077
+ "logps/chosen": -215.6461181640625,
1078
+ "logps/rejected": -264.3682861328125,
1079
+ "loss": 0.1723,
1080
+ "rewards/accuracies": 0.9100000858306885,
1081
+ "rewards/chosen": -3.9336395263671875,
1082
+ "rewards/margins": 5.252224922180176,
1083
+ "rewards/rejected": -9.185864448547363,
1084
+ "step": 610
1085
+ },
1086
+ {
1087
+ "epoch": 0.6888888888888889,
1088
+ "grad_norm": 26.010082244873047,
1089
+ "learning_rate": 4.14380342142266e-06,
1090
+ "logits/chosen": -1.4423331022262573,
1091
+ "logits/rejected": -1.4474163055419922,
1092
+ "logps/chosen": -207.67831420898438,
1093
+ "logps/rejected": -265.69677734375,
1094
+ "loss": 0.214,
1095
+ "rewards/accuracies": 0.9099999666213989,
1096
+ "rewards/chosen": -3.3267159461975098,
1097
+ "rewards/margins": 5.816192626953125,
1098
+ "rewards/rejected": -9.142909049987793,
1099
+ "step": 620
1100
+ },
1101
+ {
1102
+ "epoch": 0.7,
1103
+ "grad_norm": 23.913930892944336,
1104
+ "learning_rate": 4.106969024216348e-06,
1105
+ "logits/chosen": -1.43362557888031,
1106
+ "logits/rejected": -1.4401135444641113,
1107
+ "logps/chosen": -211.0988311767578,
1108
+ "logps/rejected": -265.10693359375,
1109
+ "loss": 0.4388,
1110
+ "rewards/accuracies": 0.8899999856948853,
1111
+ "rewards/chosen": -3.5427446365356445,
1112
+ "rewards/margins": 5.675654411315918,
1113
+ "rewards/rejected": -9.218399047851562,
1114
+ "step": 630
1115
+ },
1116
+ {
1117
+ "epoch": 0.7111111111111111,
1118
+ "grad_norm": 26.446819305419922,
1119
+ "learning_rate": 4.069530311680247e-06,
1120
+ "logits/chosen": -1.4354360103607178,
1121
+ "logits/rejected": -1.442990779876709,
1122
+ "logps/chosen": -204.5161590576172,
1123
+ "logps/rejected": -251.73101806640625,
1124
+ "loss": 0.2555,
1125
+ "rewards/accuracies": 0.9200000166893005,
1126
+ "rewards/chosen": -2.7997024059295654,
1127
+ "rewards/margins": 5.167999267578125,
1128
+ "rewards/rejected": -7.967701435089111,
1129
+ "step": 640
1130
+ },
1131
+ {
1132
+ "epoch": 0.7222222222222222,
1133
+ "grad_norm": 2.0295379161834717,
1134
+ "learning_rate": 4.031501362983007e-06,
1135
+ "logits/chosen": -1.4334403276443481,
1136
+ "logits/rejected": -1.4392154216766357,
1137
+ "logps/chosen": -205.815673828125,
1138
+ "logps/rejected": -249.6090087890625,
1139
+ "loss": 0.3747,
1140
+ "rewards/accuracies": 0.8800000548362732,
1141
+ "rewards/chosen": -3.0156917572021484,
1142
+ "rewards/margins": 4.648188591003418,
1143
+ "rewards/rejected": -7.663880348205566,
1144
+ "step": 650
1145
+ },
1146
+ {
1147
+ "epoch": 0.7333333333333333,
1148
+ "grad_norm": 22.82501792907715,
1149
+ "learning_rate": 3.992896479256966e-06,
1150
+ "logits/chosen": -1.4355220794677734,
1151
+ "logits/rejected": -1.4445066452026367,
1152
+ "logps/chosen": -205.87745666503906,
1153
+ "logps/rejected": -252.21890258789062,
1154
+ "loss": 0.2136,
1155
+ "rewards/accuracies": 0.9500000476837158,
1156
+ "rewards/chosen": -2.8590097427368164,
1157
+ "rewards/margins": 5.230529308319092,
1158
+ "rewards/rejected": -8.08953857421875,
1159
+ "step": 660
1160
+ },
1161
+ {
1162
+ "epoch": 0.7333333333333333,
1163
+ "eval_logits/chosen": -1.4456157684326172,
1164
+ "eval_logits/rejected": -1.451847791671753,
1165
+ "eval_logps/chosen": -206.54913330078125,
1166
+ "eval_logps/rejected": -253.787353515625,
1167
+ "eval_loss": 0.19935038685798645,
1168
+ "eval_rewards/accuracies": 0.918999969959259,
1169
+ "eval_rewards/chosen": -3.1520204544067383,
1170
+ "eval_rewards/margins": 4.865995407104492,
1171
+ "eval_rewards/rejected": -8.01801586151123,
1172
+ "eval_runtime": 319.1328,
1173
+ "eval_samples_per_second": 3.133,
1174
+ "eval_steps_per_second": 0.313,
1175
+ "step": 660
1176
+ },
1177
+ {
1178
+ "epoch": 0.7444444444444445,
1179
+ "grad_norm": 37.078155517578125,
1180
+ "learning_rate": 3.953730178220067e-06,
1181
+ "logits/chosen": -1.4451912641525269,
1182
+ "logits/rejected": -1.4504950046539307,
1183
+ "logps/chosen": -208.33489990234375,
1184
+ "logps/rejected": -255.33157348632812,
1185
+ "loss": 0.2289,
1186
+ "rewards/accuracies": 0.9199999570846558,
1187
+ "rewards/chosen": -3.3780035972595215,
1188
+ "rewards/margins": 4.752861976623535,
1189
+ "rewards/rejected": -8.130865097045898,
1190
+ "step": 670
1191
+ },
1192
+ {
1193
+ "epoch": 0.7555555555555555,
1194
+ "grad_norm": 14.792739868164062,
1195
+ "learning_rate": 3.914017188716347e-06,
1196
+ "logits/chosen": -1.446117877960205,
1197
+ "logits/rejected": -1.4537690877914429,
1198
+ "logps/chosen": -207.12896728515625,
1199
+ "logps/rejected": -261.03814697265625,
1200
+ "loss": 0.1755,
1201
+ "rewards/accuracies": 0.9399999976158142,
1202
+ "rewards/chosen": -3.137814998626709,
1203
+ "rewards/margins": 5.663388252258301,
1204
+ "rewards/rejected": -8.801202774047852,
1205
+ "step": 680
1206
+ },
1207
+ {
1208
+ "epoch": 0.7666666666666667,
1209
+ "grad_norm": 9.229610443115234,
1210
+ "learning_rate": 3.8737724451770155e-06,
1211
+ "logits/chosen": -1.4443621635437012,
1212
+ "logits/rejected": -1.4512722492218018,
1213
+ "logps/chosen": -215.41629028320312,
1214
+ "logps/rejected": -255.59149169921875,
1215
+ "loss": 0.2433,
1216
+ "rewards/accuracies": 0.8800000548362732,
1217
+ "rewards/chosen": -3.9103140830993652,
1218
+ "rewards/margins": 4.392501354217529,
1219
+ "rewards/rejected": -8.302814483642578,
1220
+ "step": 690
1221
+ },
1222
+ {
1223
+ "epoch": 0.7777777777777778,
1224
+ "grad_norm": 4.114097595214844,
1225
+ "learning_rate": 3.833011082004229e-06,
1226
+ "logits/chosen": -1.4504740238189697,
1227
+ "logits/rejected": -1.4539170265197754,
1228
+ "logps/chosen": -208.27923583984375,
1229
+ "logps/rejected": -259.309326171875,
1230
+ "loss": 0.1322,
1231
+ "rewards/accuracies": 0.940000057220459,
1232
+ "rewards/chosen": -3.5451531410217285,
1233
+ "rewards/margins": 4.7936835289001465,
1234
+ "rewards/rejected": -8.338837623596191,
1235
+ "step": 700
1236
+ },
1237
+ {
1238
+ "epoch": 0.7888888888888889,
1239
+ "grad_norm": 14.269043922424316,
1240
+ "learning_rate": 3.7917484278796578e-06,
1241
+ "logits/chosen": -1.4536712169647217,
1242
+ "logits/rejected": -1.4596309661865234,
1243
+ "logps/chosen": -212.81170654296875,
1244
+ "logps/rejected": -259.4583435058594,
1245
+ "loss": 0.2778,
1246
+ "rewards/accuracies": 0.9100000858306885,
1247
+ "rewards/chosen": -3.7558376789093018,
1248
+ "rewards/margins": 4.881363868713379,
1249
+ "rewards/rejected": -8.637201309204102,
1250
+ "step": 710
1251
+ },
1252
+ {
1253
+ "epoch": 0.8,
1254
+ "grad_norm": 2.647397756576538,
1255
+ "learning_rate": 3.7500000000000005e-06,
1256
+ "logits/chosen": -1.4511842727661133,
1257
+ "logits/rejected": -1.456930160522461,
1258
+ "logps/chosen": -208.67654418945312,
1259
+ "logps/rejected": -263.60205078125,
1260
+ "loss": 0.2148,
1261
+ "rewards/accuracies": 0.940000057220459,
1262
+ "rewards/chosen": -3.4297666549682617,
1263
+ "rewards/margins": 5.496917724609375,
1264
+ "rewards/rejected": -8.926685333251953,
1265
+ "step": 720
1266
+ },
1267
+ {
1268
+ "epoch": 0.8,
1269
+ "eval_logits/chosen": -1.4526758193969727,
1270
+ "eval_logits/rejected": -1.4588308334350586,
1271
+ "eval_logps/chosen": -208.24917602539062,
1272
+ "eval_logps/rejected": -259.9820251464844,
1273
+ "eval_loss": 0.26233014464378357,
1274
+ "eval_rewards/accuracies": 0.9039999842643738,
1275
+ "eval_rewards/chosen": -3.3220245838165283,
1276
+ "eval_rewards/margins": 5.315458297729492,
1277
+ "eval_rewards/rejected": -8.637483596801758,
1278
+ "eval_runtime": 319.0745,
1279
+ "eval_samples_per_second": 3.134,
1280
+ "eval_steps_per_second": 0.313,
1281
+ "step": 720
1282
+ },
1283
+ {
1284
+ "epoch": 0.8111111111111111,
1285
+ "grad_norm": 27.4842472076416,
1286
+ "learning_rate": 3.7077814982415966e-06,
1287
+ "logits/chosen": -1.4542248249053955,
1288
+ "logits/rejected": -1.4581375122070312,
1289
+ "logps/chosen": -201.25257873535156,
1290
+ "logps/rejected": -267.01409912109375,
1291
+ "loss": 0.1524,
1292
+ "rewards/accuracies": 0.9300000071525574,
1293
+ "rewards/chosen": -2.901744842529297,
1294
+ "rewards/margins": 6.153472900390625,
1295
+ "rewards/rejected": -9.055217742919922,
1296
+ "step": 730
1297
+ },
1298
+ {
1299
+ "epoch": 0.8222222222222222,
1300
+ "grad_norm": 17.44131851196289,
1301
+ "learning_rate": 3.665108799256348e-06,
1302
+ "logits/chosen": -1.4501639604568481,
1303
+ "logits/rejected": -1.4550120830535889,
1304
+ "logps/chosen": -215.76513671875,
1305
+ "logps/rejected": -265.45428466796875,
1306
+ "loss": 0.1982,
1307
+ "rewards/accuracies": 0.9200000166893005,
1308
+ "rewards/chosen": -4.081113815307617,
1309
+ "rewards/margins": 5.071871757507324,
1310
+ "rewards/rejected": -9.152984619140625,
1311
+ "step": 740
1312
+ },
1313
+ {
1314
+ "epoch": 0.8333333333333334,
1315
+ "grad_norm": 58.25971221923828,
1316
+ "learning_rate": 3.621997950501156e-06,
1317
+ "logits/chosen": -1.4513449668884277,
1318
+ "logits/rejected": -1.4563398361206055,
1319
+ "logps/chosen": -208.85487365722656,
1320
+ "logps/rejected": -267.5930480957031,
1321
+ "loss": 0.2564,
1322
+ "rewards/accuracies": 0.89000004529953,
1323
+ "rewards/chosen": -3.607893466949463,
1324
+ "rewards/margins": 5.560456275939941,
1325
+ "rewards/rejected": -9.168350219726562,
1326
+ "step": 750
1327
+ },
1328
+ {
1329
+ "epoch": 0.8444444444444444,
1330
+ "grad_norm": 30.51304054260254,
1331
+ "learning_rate": 3.578465164203134e-06,
1332
+ "logits/chosen": -1.454546332359314,
1333
+ "logits/rejected": -1.457871913909912,
1334
+ "logps/chosen": -204.0816650390625,
1335
+ "logps/rejected": -271.85711669921875,
1336
+ "loss": 0.169,
1337
+ "rewards/accuracies": 0.9500000476837158,
1338
+ "rewards/chosen": -3.2631070613861084,
1339
+ "rewards/margins": 6.1964874267578125,
1340
+ "rewards/rejected": -9.4595947265625,
1341
+ "step": 760
1342
+ },
1343
+ {
1344
+ "epoch": 0.8555555555555555,
1345
+ "grad_norm": 28.097698211669922,
1346
+ "learning_rate": 3.5345268112628485e-06,
1347
+ "logits/chosen": -1.4505870342254639,
1348
+ "logits/rejected": -1.457573652267456,
1349
+ "logps/chosen": -215.683349609375,
1350
+ "logps/rejected": -270.27252197265625,
1351
+ "loss": 0.2219,
1352
+ "rewards/accuracies": 0.9300000071525574,
1353
+ "rewards/chosen": -4.015974998474121,
1354
+ "rewards/margins": 5.678750038146973,
1355
+ "rewards/rejected": -9.694725036621094,
1356
+ "step": 770
1357
+ },
1358
+ {
1359
+ "epoch": 0.8666666666666667,
1360
+ "grad_norm": 36.97835159301758,
1361
+ "learning_rate": 3.4901994150978926e-06,
1362
+ "logits/chosen": -1.4549884796142578,
1363
+ "logits/rejected": -1.4569082260131836,
1364
+ "logps/chosen": -204.8563995361328,
1365
+ "logps/rejected": -270.4274597167969,
1366
+ "loss": 0.151,
1367
+ "rewards/accuracies": 0.9600000381469727,
1368
+ "rewards/chosen": -3.443523406982422,
1369
+ "rewards/margins": 5.77408504486084,
1370
+ "rewards/rejected": -9.217609405517578,
1371
+ "step": 780
1372
+ },
1373
+ {
1374
+ "epoch": 0.8666666666666667,
1375
+ "eval_logits/chosen": -1.455579400062561,
1376
+ "eval_logits/rejected": -1.462104320526123,
1377
+ "eval_logps/chosen": -212.8717041015625,
1378
+ "eval_logps/rejected": -266.91241455078125,
1379
+ "eval_loss": 0.26282998919487,
1380
+ "eval_rewards/accuracies": 0.8830000162124634,
1381
+ "eval_rewards/chosen": -3.78427791595459,
1382
+ "eval_rewards/margins": 5.546243190765381,
1383
+ "eval_rewards/rejected": -9.330520629882812,
1384
+ "eval_runtime": 319.1792,
1385
+ "eval_samples_per_second": 3.133,
1386
+ "eval_steps_per_second": 0.313,
1387
+ "step": 780
1388
+ },
1389
+ {
1390
+ "epoch": 0.8777777777777778,
1391
+ "grad_norm": 4.409013748168945,
1392
+ "learning_rate": 3.4454996454291066e-06,
1393
+ "logits/chosen": -1.454880952835083,
1394
+ "logits/rejected": -1.4608569145202637,
1395
+ "logps/chosen": -213.51556396484375,
1396
+ "logps/rejected": -270.1238708496094,
1397
+ "loss": 0.2572,
1398
+ "rewards/accuracies": 0.8999999761581421,
1399
+ "rewards/chosen": -3.862175464630127,
1400
+ "rewards/margins": 5.772583961486816,
1401
+ "rewards/rejected": -9.634759902954102,
1402
+ "step": 790
1403
+ },
1404
+ {
1405
+ "epoch": 0.8888888888888888,
1406
+ "grad_norm": 29.155506134033203,
1407
+ "learning_rate": 3.400444312011776e-06,
1408
+ "logits/chosen": -1.4549602270126343,
1409
+ "logits/rejected": -1.4602875709533691,
1410
+ "logps/chosen": -212.6188201904297,
1411
+ "logps/rejected": -274.49560546875,
1412
+ "loss": 0.1285,
1413
+ "rewards/accuracies": 0.9600000381469727,
1414
+ "rewards/chosen": -3.8824949264526367,
1415
+ "rewards/margins": 6.083772659301758,
1416
+ "rewards/rejected": -9.966266632080078,
1417
+ "step": 800
1418
+ },
1419
+ {
1420
+ "epoch": 0.9,
1421
+ "grad_norm": 28.179977416992188,
1422
+ "learning_rate": 3.3550503583141726e-06,
1423
+ "logits/chosen": -1.4578851461410522,
1424
+ "logits/rejected": -1.4644014835357666,
1425
+ "logps/chosen": -214.60816955566406,
1426
+ "logps/rejected": -270.767822265625,
1427
+ "loss": 0.3057,
1428
+ "rewards/accuracies": 0.8899999856948853,
1429
+ "rewards/chosen": -3.949023723602295,
1430
+ "rewards/margins": 5.775270462036133,
1431
+ "rewards/rejected": -9.724294662475586,
1432
+ "step": 810
1433
+ },
1434
+ {
1435
+ "epoch": 0.9111111111111111,
1436
+ "grad_norm": 22.016096115112305,
1437
+ "learning_rate": 3.3093348551458033e-06,
1438
+ "logits/chosen": -1.4591329097747803,
1439
+ "logits/rejected": -1.464478850364685,
1440
+ "logps/chosen": -206.40281677246094,
1441
+ "logps/rejected": -272.22930908203125,
1442
+ "loss": 0.1286,
1443
+ "rewards/accuracies": 0.9700000286102295,
1444
+ "rewards/chosen": -3.3459863662719727,
1445
+ "rewards/margins": 6.317253112792969,
1446
+ "rewards/rejected": -9.663239479064941,
1447
+ "step": 820
1448
+ },
1449
+ {
1450
+ "epoch": 0.9222222222222223,
1451
+ "grad_norm": 24.308671951293945,
1452
+ "learning_rate": 3.2633149942377835e-06,
1453
+ "logits/chosen": -1.4574294090270996,
1454
+ "logits/rejected": -1.4642754793167114,
1455
+ "logps/chosen": -213.82862854003906,
1456
+ "logps/rejected": -266.60675048828125,
1457
+ "loss": 0.2728,
1458
+ "rewards/accuracies": 0.9000000357627869,
1459
+ "rewards/chosen": -3.8955249786376953,
1460
+ "rewards/margins": 5.386727809906006,
1461
+ "rewards/rejected": -9.282252311706543,
1462
+ "step": 830
1463
+ },
1464
+ {
1465
+ "epoch": 0.9333333333333333,
1466
+ "grad_norm": 18.76812171936035,
1467
+ "learning_rate": 3.217008081777726e-06,
1468
+ "logits/chosen": -1.4542195796966553,
1469
+ "logits/rejected": -1.461412787437439,
1470
+ "logps/chosen": -212.99435424804688,
1471
+ "logps/rejected": -267.50958251953125,
1472
+ "loss": 0.1759,
1473
+ "rewards/accuracies": 0.940000057220459,
1474
+ "rewards/chosen": -3.8036112785339355,
1475
+ "rewards/margins": 5.557330131530762,
1476
+ "rewards/rejected": -9.360941886901855,
1477
+ "step": 840
1478
+ },
1479
+ {
1480
+ "epoch": 0.9333333333333333,
1481
+ "eval_logits/chosen": -1.4564862251281738,
1482
+ "eval_logits/rejected": -1.463136911392212,
1483
+ "eval_logps/chosen": -212.54718017578125,
1484
+ "eval_logps/rejected": -267.1683349609375,
1485
+ "eval_loss": 0.17360562086105347,
1486
+ "eval_rewards/accuracies": 0.9269999861717224,
1487
+ "eval_rewards/chosen": -3.751824378967285,
1488
+ "eval_rewards/margins": 5.604288101196289,
1489
+ "eval_rewards/rejected": -9.35611343383789,
1490
+ "eval_runtime": 319.0169,
1491
+ "eval_samples_per_second": 3.135,
1492
+ "eval_steps_per_second": 0.313,
1493
+ "step": 840
1494
+ },
1495
+ {
1496
+ "epoch": 0.9444444444444444,
1497
+ "grad_norm": 7.19240665435791,
1498
+ "learning_rate": 3.1704315319015936e-06,
1499
+ "logits/chosen": -1.4580819606781006,
1500
+ "logits/rejected": -1.46415114402771,
1501
+ "logps/chosen": -211.7685546875,
1502
+ "logps/rejected": -267.0213623046875,
1503
+ "loss": 0.2128,
1504
+ "rewards/accuracies": 0.9100000262260437,
1505
+ "rewards/chosen": -3.7857413291931152,
1506
+ "rewards/margins": 5.433224678039551,
1507
+ "rewards/rejected": -9.218965530395508,
1508
+ "step": 850
1509
+ },
1510
+ {
1511
+ "epoch": 0.9555555555555556,
1512
+ "grad_norm": 36.987693786621094,
1513
+ "learning_rate": 3.1236028601449534e-06,
1514
+ "logits/chosen": -1.457148551940918,
1515
+ "logits/rejected": -1.4629095792770386,
1516
+ "logps/chosen": -213.85028076171875,
1517
+ "logps/rejected": -263.3716735839844,
1518
+ "loss": 0.2345,
1519
+ "rewards/accuracies": 0.8800000548362732,
1520
+ "rewards/chosen": -3.9159281253814697,
1521
+ "rewards/margins": 5.010843276977539,
1522
+ "rewards/rejected": -8.92677116394043,
1523
+ "step": 860
1524
+ },
1525
+ {
1526
+ "epoch": 0.9666666666666667,
1527
+ "grad_norm": 3.213857889175415,
1528
+ "learning_rate": 3.0765396768561005e-06,
1529
+ "logits/chosen": -1.4600489139556885,
1530
+ "logits/rejected": -1.4643452167510986,
1531
+ "logps/chosen": -207.65179443359375,
1532
+ "logps/rejected": -265.60382080078125,
1533
+ "loss": 0.1257,
1534
+ "rewards/accuracies": 0.940000057220459,
1535
+ "rewards/chosen": -3.5244479179382324,
1536
+ "rewards/margins": 5.42505407333374,
1537
+ "rewards/rejected": -8.949502944946289,
1538
+ "step": 870
1539
+ },
1540
+ {
1541
+ "epoch": 0.9777777777777777,
1542
+ "grad_norm": 2.7685673236846924,
1543
+ "learning_rate": 3.0292596805735275e-06,
1544
+ "logits/chosen": -1.4531805515289307,
1545
+ "logits/rejected": -1.4613621234893799,
1546
+ "logps/chosen": -207.08041381835938,
1547
+ "logps/rejected": -272.2119140625,
1548
+ "loss": 0.0729,
1549
+ "rewards/accuracies": 0.9600000381469727,
1550
+ "rewards/chosen": -3.164515495300293,
1551
+ "rewards/margins": 6.724908351898193,
1552
+ "rewards/rejected": -9.889423370361328,
1553
+ "step": 880
1554
+ },
1555
+ {
1556
+ "epoch": 0.9888888888888889,
1557
+ "grad_norm": 32.784828186035156,
1558
+ "learning_rate": 2.9817806513702247e-06,
1559
+ "logits/chosen": -1.4549615383148193,
1560
+ "logits/rejected": -1.4622005224227905,
1561
+ "logps/chosen": -208.28564453125,
1562
+ "logps/rejected": -271.87994384765625,
1563
+ "loss": 0.261,
1564
+ "rewards/accuracies": 0.9000000357627869,
1565
+ "rewards/chosen": -3.400259494781494,
1566
+ "rewards/margins": 6.355001449584961,
1567
+ "rewards/rejected": -9.755260467529297,
1568
+ "step": 890
1569
+ },
1570
+ {
1571
+ "epoch": 1.0,
1572
+ "grad_norm": 19.346893310546875,
1573
+ "learning_rate": 2.9341204441673267e-06,
1574
+ "logits/chosen": -1.4544117450714111,
1575
+ "logits/rejected": -1.4625937938690186,
1576
+ "logps/chosen": -213.2257537841797,
1577
+ "logps/rejected": -273.80535888671875,
1578
+ "loss": 0.1455,
1579
+ "rewards/accuracies": 0.9500000476837158,
1580
+ "rewards/chosen": -3.6717934608459473,
1581
+ "rewards/margins": 6.4866108894348145,
1582
+ "rewards/rejected": -10.158405303955078,
1583
+ "step": 900
1584
+ },
1585
+ {
1586
+ "epoch": 1.0,
1587
+ "eval_logits/chosen": -1.4550888538360596,
1588
+ "eval_logits/rejected": -1.4625444412231445,
1589
+ "eval_logps/chosen": -209.57638549804688,
1590
+ "eval_logps/rejected": -274.5335388183594,
1591
+ "eval_loss": 0.19673706591129303,
1592
+ "eval_rewards/accuracies": 0.9290000200271606,
1593
+ "eval_rewards/chosen": -3.454745292663574,
1594
+ "eval_rewards/margins": 6.637889862060547,
1595
+ "eval_rewards/rejected": -10.092636108398438,
1596
+ "eval_runtime": 319.1955,
1597
+ "eval_samples_per_second": 3.133,
1598
+ "eval_steps_per_second": 0.313,
1599
+ "step": 900
1600
+ },
1601
+ {
1602
+ "epoch": 1.011111111111111,
1603
+ "grad_norm": 5.159682273864746,
1604
+ "learning_rate": 2.8862969820196017e-06,
1605
+ "logits/chosen": -1.453148603439331,
1606
+ "logits/rejected": -1.460700511932373,
1607
+ "logps/chosen": -207.94732666015625,
1608
+ "logps/rejected": -275.78265380859375,
1609
+ "loss": 0.1197,
1610
+ "rewards/accuracies": 0.9500000476837158,
1611
+ "rewards/chosen": -3.277247428894043,
1612
+ "rewards/margins": 6.945833683013916,
1613
+ "rewards/rejected": -10.223081588745117,
1614
+ "step": 910
1615
+ },
1616
+ {
1617
+ "epoch": 1.0222222222222221,
1618
+ "grad_norm": 39.852725982666016,
1619
+ "learning_rate": 2.8383282493753282e-06,
1620
+ "logits/chosen": -1.4552119970321655,
1621
+ "logits/rejected": -1.4620335102081299,
1622
+ "logps/chosen": -205.69607543945312,
1623
+ "logps/rejected": -279.0772705078125,
1624
+ "loss": 0.1646,
1625
+ "rewards/accuracies": 0.9500000476837158,
1626
+ "rewards/chosen": -3.194272041320801,
1627
+ "rewards/margins": 7.236158847808838,
1628
+ "rewards/rejected": -10.430431365966797,
1629
+ "step": 920
1630
+ },
1631
+ {
1632
+ "epoch": 1.0333333333333334,
1633
+ "grad_norm": 0.4127664268016815,
1634
+ "learning_rate": 2.7902322853130758e-06,
1635
+ "logits/chosen": -1.4518330097198486,
1636
+ "logits/rejected": -1.4583864212036133,
1637
+ "logps/chosen": -208.1166229248047,
1638
+ "logps/rejected": -273.89801025390625,
1639
+ "loss": 0.1935,
1640
+ "rewards/accuracies": 0.9300000071525574,
1641
+ "rewards/chosen": -3.375034809112549,
1642
+ "rewards/margins": 6.5843119621276855,
1643
+ "rewards/rejected": -9.959346771240234,
1644
+ "step": 930
1645
+ },
1646
+ {
1647
+ "epoch": 1.0444444444444445,
1648
+ "grad_norm": 55.90793991088867,
1649
+ "learning_rate": 2.742027176757948e-06,
1650
+ "logits/chosen": -1.4538707733154297,
1651
+ "logits/rejected": -1.4589080810546875,
1652
+ "logps/chosen": -207.4318389892578,
1653
+ "logps/rejected": -275.7708740234375,
1654
+ "loss": 0.2136,
1655
+ "rewards/accuracies": 0.9100000262260437,
1656
+ "rewards/chosen": -3.4339537620544434,
1657
+ "rewards/margins": 6.580141544342041,
1658
+ "rewards/rejected": -10.014095306396484,
1659
+ "step": 940
1660
+ },
1661
+ {
1662
+ "epoch": 1.0555555555555556,
1663
+ "grad_norm": 27.653209686279297,
1664
+ "learning_rate": 2.6937310516798276e-06,
1665
+ "logits/chosen": -1.4511687755584717,
1666
+ "logits/rejected": -1.4569811820983887,
1667
+ "logps/chosen": -213.1746368408203,
1668
+ "logps/rejected": -274.05364990234375,
1669
+ "loss": 0.3442,
1670
+ "rewards/accuracies": 0.8800000548362732,
1671
+ "rewards/chosen": -3.9909844398498535,
1672
+ "rewards/margins": 5.839582443237305,
1673
+ "rewards/rejected": -9.83056640625,
1674
+ "step": 950
1675
+ },
1676
+ {
1677
+ "epoch": 1.0666666666666667,
1678
+ "grad_norm": 17.936847686767578,
1679
+ "learning_rate": 2.6453620722761897e-06,
1680
+ "logits/chosen": -1.4525644779205322,
1681
+ "logits/rejected": -1.4593393802642822,
1682
+ "logps/chosen": -210.91744995117188,
1683
+ "logps/rejected": -276.6822814941406,
1684
+ "loss": 0.1456,
1685
+ "rewards/accuracies": 0.9500000476837158,
1686
+ "rewards/chosen": -3.682964324951172,
1687
+ "rewards/margins": 6.5234174728393555,
1688
+ "rewards/rejected": -10.206380844116211,
1689
+ "step": 960
1690
+ },
1691
+ {
1692
+ "epoch": 1.0666666666666667,
1693
+ "eval_logits/chosen": -1.4538413286209106,
1694
+ "eval_logits/rejected": -1.461044430732727,
1695
+ "eval_logps/chosen": -214.53591918945312,
1696
+ "eval_logps/rejected": -277.791259765625,
1697
+ "eval_loss": 0.2036525309085846,
1698
+ "eval_rewards/accuracies": 0.9289999604225159,
1699
+ "eval_rewards/chosen": -3.950699806213379,
1700
+ "eval_rewards/margins": 6.467706680297852,
1701
+ "eval_rewards/rejected": -10.418405532836914,
1702
+ "eval_runtime": 319.0271,
1703
+ "eval_samples_per_second": 3.135,
1704
+ "eval_steps_per_second": 0.313,
1705
+ "step": 960
1706
+ },
1707
+ {
1708
+ "epoch": 1.0777777777777777,
1709
+ "grad_norm": 53.1196403503418,
1710
+ "learning_rate": 2.5969384281420425e-06,
1711
+ "logits/chosen": -1.452633023262024,
1712
+ "logits/rejected": -1.4589219093322754,
1713
+ "logps/chosen": -213.8082275390625,
1714
+ "logps/rejected": -272.00054931640625,
1715
+ "loss": 0.2095,
1716
+ "rewards/accuracies": 0.9199999570846558,
1717
+ "rewards/chosen": -3.940258264541626,
1718
+ "rewards/margins": 5.825028896331787,
1719
+ "rewards/rejected": -9.765287399291992,
1720
+ "step": 970
1721
+ },
1722
+ {
1723
+ "epoch": 1.0888888888888888,
1724
+ "grad_norm": 9.145478248596191,
1725
+ "learning_rate": 2.548478329429561e-06,
1726
+ "logits/chosen": -1.4536033868789673,
1727
+ "logits/rejected": -1.4611570835113525,
1728
+ "logps/chosen": -206.41012573242188,
1729
+ "logps/rejected": -274.23272705078125,
1730
+ "loss": 0.2283,
1731
+ "rewards/accuracies": 0.9200000166893005,
1732
+ "rewards/chosen": -3.157097578048706,
1733
+ "rewards/margins": 6.88623046875,
1734
+ "rewards/rejected": -10.043328285217285,
1735
+ "step": 980
1736
+ },
1737
+ {
1738
+ "epoch": 1.1,
1739
+ "grad_norm": 26.69437026977539,
1740
+ "learning_rate": 2.5e-06,
1741
+ "logits/chosen": -1.452072024345398,
1742
+ "logits/rejected": -1.460184097290039,
1743
+ "logps/chosen": -215.56329345703125,
1744
+ "logps/rejected": -278.34051513671875,
1745
+ "loss": 0.2056,
1746
+ "rewards/accuracies": 0.9099999666213989,
1747
+ "rewards/chosen": -4.010292053222656,
1748
+ "rewards/margins": 6.502901077270508,
1749
+ "rewards/rejected": -10.513193130493164,
1750
+ "step": 990
1751
+ },
1752
+ {
1753
+ "epoch": 1.1111111111111112,
1754
+ "grad_norm": 26.09144401550293,
1755
+ "learning_rate": 2.4515216705704396e-06,
1756
+ "logits/chosen": -1.4517230987548828,
1757
+ "logits/rejected": -1.4599707126617432,
1758
+ "logps/chosen": -214.8649444580078,
1759
+ "logps/rejected": -274.9700927734375,
1760
+ "loss": 0.2523,
1761
+ "rewards/accuracies": 0.8999999761581421,
1762
+ "rewards/chosen": -3.9132699966430664,
1763
+ "rewards/margins": 6.279613494873047,
1764
+ "rewards/rejected": -10.192882537841797,
1765
+ "step": 1000
1766
+ },
1767
+ {
1768
+ "epoch": 1.1222222222222222,
1769
+ "grad_norm": 25.6414852142334,
1770
+ "learning_rate": 2.403061571857958e-06,
1771
+ "logits/chosen": -1.4531188011169434,
1772
+ "logits/rejected": -1.460578441619873,
1773
+ "logps/chosen": -206.95849609375,
1774
+ "logps/rejected": -270.173583984375,
1775
+ "loss": 0.1519,
1776
+ "rewards/accuracies": 0.9500000476837158,
1777
+ "rewards/chosen": -3.1788997650146484,
1778
+ "rewards/margins": 6.496912002563477,
1779
+ "rewards/rejected": -9.675811767578125,
1780
+ "step": 1010
1781
+ },
1782
+ {
1783
+ "epoch": 1.1333333333333333,
1784
+ "grad_norm": 18.907466888427734,
1785
+ "learning_rate": 2.3546379277238107e-06,
1786
+ "logits/chosen": -1.4472074508666992,
1787
+ "logits/rejected": -1.4559324979782104,
1788
+ "logps/chosen": -208.98306274414062,
1789
+ "logps/rejected": -275.3448486328125,
1790
+ "loss": 0.1276,
1791
+ "rewards/accuracies": 0.940000057220459,
1792
+ "rewards/chosen": -3.3369410037994385,
1793
+ "rewards/margins": 6.897830486297607,
1794
+ "rewards/rejected": -10.234771728515625,
1795
+ "step": 1020
1796
+ },
1797
+ {
1798
+ "epoch": 1.1333333333333333,
1799
+ "eval_logits/chosen": -1.4493515491485596,
1800
+ "eval_logits/rejected": -1.4567832946777344,
1801
+ "eval_logps/chosen": -212.98690795898438,
1802
+ "eval_logps/rejected": -277.5372619628906,
1803
+ "eval_loss": 0.20899365842342377,
1804
+ "eval_rewards/accuracies": 0.9240000247955322,
1805
+ "eval_rewards/chosen": -3.7957983016967773,
1806
+ "eval_rewards/margins": 6.597206115722656,
1807
+ "eval_rewards/rejected": -10.39300537109375,
1808
+ "eval_runtime": 319.0554,
1809
+ "eval_samples_per_second": 3.134,
1810
+ "eval_steps_per_second": 0.313,
1811
+ "step": 1020
1812
+ },
1813
+ {
1814
+ "epoch": 1.1444444444444444,
1815
+ "grad_norm": 6.1386847496032715,
1816
+ "learning_rate": 2.3062689483201732e-06,
1817
+ "logits/chosen": -1.449528694152832,
1818
+ "logits/rejected": -1.4584475755691528,
1819
+ "logps/chosen": -210.4915771484375,
1820
+ "logps/rejected": -280.85870361328125,
1821
+ "loss": 0.0659,
1822
+ "rewards/accuracies": 0.9800000190734863,
1823
+ "rewards/chosen": -3.4778530597686768,
1824
+ "rewards/margins": 7.316442966461182,
1825
+ "rewards/rejected": -10.794296264648438,
1826
+ "step": 1030
1827
+ },
1828
+ {
1829
+ "epoch": 1.1555555555555554,
1830
+ "grad_norm": 0.5934897065162659,
1831
+ "learning_rate": 2.2579728232420524e-06,
1832
+ "logits/chosen": -1.450500726699829,
1833
+ "logits/rejected": -1.4581060409545898,
1834
+ "logps/chosen": -204.58624267578125,
1835
+ "logps/rejected": -282.1773681640625,
1836
+ "loss": 0.0935,
1837
+ "rewards/accuracies": 0.9500000476837158,
1838
+ "rewards/chosen": -3.075925350189209,
1839
+ "rewards/margins": 7.654919624328613,
1840
+ "rewards/rejected": -10.73084545135498,
1841
+ "step": 1040
1842
+ },
1843
+ {
1844
+ "epoch": 1.1666666666666667,
1845
+ "grad_norm": 36.4771614074707,
1846
+ "learning_rate": 2.2097677146869242e-06,
1847
+ "logits/chosen": -1.452951431274414,
1848
+ "logits/rejected": -1.4590579271316528,
1849
+ "logps/chosen": -204.48294067382812,
1850
+ "logps/rejected": -279.20623779296875,
1851
+ "loss": 0.3168,
1852
+ "rewards/accuracies": 0.9100000858306885,
1853
+ "rewards/chosen": -3.147733449935913,
1854
+ "rewards/margins": 7.201406002044678,
1855
+ "rewards/rejected": -10.349140167236328,
1856
+ "step": 1050
1857
+ },
1858
+ {
1859
+ "epoch": 1.1777777777777778,
1860
+ "grad_norm": 3.764472007751465,
1861
+ "learning_rate": 2.161671750624673e-06,
1862
+ "logits/chosen": -1.453149437904358,
1863
+ "logits/rejected": -1.4607138633728027,
1864
+ "logps/chosen": -213.9553985595703,
1865
+ "logps/rejected": -279.18658447265625,
1866
+ "loss": 0.0952,
1867
+ "rewards/accuracies": 0.9500000476837158,
1868
+ "rewards/chosen": -3.8566091060638428,
1869
+ "rewards/margins": 6.743518829345703,
1870
+ "rewards/rejected": -10.600127220153809,
1871
+ "step": 1060
1872
+ },
1873
+ {
1874
+ "epoch": 1.1888888888888889,
1875
+ "grad_norm": 42.325042724609375,
1876
+ "learning_rate": 2.113703017980399e-06,
1877
+ "logits/chosen": -1.4516856670379639,
1878
+ "logits/rejected": -1.4586374759674072,
1879
+ "logps/chosen": -212.69400024414062,
1880
+ "logps/rejected": -279.00396728515625,
1881
+ "loss": 0.1345,
1882
+ "rewards/accuracies": 0.9300000667572021,
1883
+ "rewards/chosen": -3.775832176208496,
1884
+ "rewards/margins": 6.73020076751709,
1885
+ "rewards/rejected": -10.506032943725586,
1886
+ "step": 1070
1887
+ },
1888
+ {
1889
+ "epoch": 1.2,
1890
+ "grad_norm": 11.120525360107422,
1891
+ "learning_rate": 2.0658795558326745e-06,
1892
+ "logits/chosen": -1.4495866298675537,
1893
+ "logits/rejected": -1.4552946090698242,
1894
+ "logps/chosen": -208.491943359375,
1895
+ "logps/rejected": -277.58441162109375,
1896
+ "loss": 0.1768,
1897
+ "rewards/accuracies": 0.9300000667572021,
1898
+ "rewards/chosen": -3.599666118621826,
1899
+ "rewards/margins": 6.546569347381592,
1900
+ "rewards/rejected": -10.146235466003418,
1901
+ "step": 1080
1902
+ },
1903
+ {
1904
+ "epoch": 1.2,
1905
+ "eval_logits/chosen": -1.4486984014511108,
1906
+ "eval_logits/rejected": -1.456477403640747,
1907
+ "eval_logps/chosen": -212.42550659179688,
1908
+ "eval_logps/rejected": -281.87176513671875,
1909
+ "eval_loss": 0.17441098392009735,
1910
+ "eval_rewards/accuracies": 0.934999942779541,
1911
+ "eval_rewards/chosen": -3.739656925201416,
1912
+ "eval_rewards/margins": 7.086799621582031,
1913
+ "eval_rewards/rejected": -10.826456069946289,
1914
+ "eval_runtime": 319.0538,
1915
+ "eval_samples_per_second": 3.134,
1916
+ "eval_steps_per_second": 0.313,
1917
+ "step": 1080
1918
+ },
1919
+ {
1920
+ "epoch": 1.211111111111111,
1921
+ "grad_norm": 0.504275918006897,
1922
+ "learning_rate": 2.0182193486297757e-06,
1923
+ "logits/chosen": -1.4507848024368286,
1924
+ "logits/rejected": -1.457758903503418,
1925
+ "logps/chosen": -210.86541748046875,
1926
+ "logps/rejected": -281.8276062011719,
1927
+ "loss": 0.2981,
1928
+ "rewards/accuracies": 0.8899999856948853,
1929
+ "rewards/chosen": -3.7087669372558594,
1930
+ "rewards/margins": 6.992186546325684,
1931
+ "rewards/rejected": -10.700953483581543,
1932
+ "step": 1090
1933
+ },
1934
+ {
1935
+ "epoch": 1.2222222222222223,
1936
+ "grad_norm": 38.076751708984375,
1937
+ "learning_rate": 1.970740319426474e-06,
1938
+ "logits/chosen": -1.4482132196426392,
1939
+ "logits/rejected": -1.4552950859069824,
1940
+ "logps/chosen": -210.63734436035156,
1941
+ "logps/rejected": -281.1439514160156,
1942
+ "loss": 0.1044,
1943
+ "rewards/accuracies": 0.9500000476837158,
1944
+ "rewards/chosen": -3.595562219619751,
1945
+ "rewards/margins": 7.117644786834717,
1946
+ "rewards/rejected": -10.713207244873047,
1947
+ "step": 1100
1948
+ },
1949
+ {
1950
+ "epoch": 1.2333333333333334,
1951
+ "grad_norm": 33.685394287109375,
1952
+ "learning_rate": 1.9234603231439e-06,
1953
+ "logits/chosen": -1.4512310028076172,
1954
+ "logits/rejected": -1.4559197425842285,
1955
+ "logps/chosen": -212.38308715820312,
1956
+ "logps/rejected": -278.6757507324219,
1957
+ "loss": 0.1273,
1958
+ "rewards/accuracies": 0.9300000667572021,
1959
+ "rewards/chosen": -3.874723434448242,
1960
+ "rewards/margins": 6.4788641929626465,
1961
+ "rewards/rejected": -10.353588104248047,
1962
+ "step": 1110
1963
+ },
1964
+ {
1965
+ "epoch": 1.2444444444444445,
1966
+ "grad_norm": 3.557368755340576,
1967
+ "learning_rate": 1.876397139855047e-06,
1968
+ "logits/chosen": -1.4465047121047974,
1969
+ "logits/rejected": -1.4536259174346924,
1970
+ "logps/chosen": -216.92788696289062,
1971
+ "logps/rejected": -283.7412109375,
1972
+ "loss": 0.1901,
1973
+ "rewards/accuracies": 0.9000000357627869,
1974
+ "rewards/chosen": -4.245588302612305,
1975
+ "rewards/margins": 6.73915958404541,
1976
+ "rewards/rejected": -10.984746932983398,
1977
+ "step": 1120
1978
+ },
1979
+ {
1980
+ "epoch": 1.2555555555555555,
1981
+ "grad_norm": 7.008020401000977,
1982
+ "learning_rate": 1.8295684680984064e-06,
1983
+ "logits/chosen": -1.4479541778564453,
1984
+ "logits/rejected": -1.4568493366241455,
1985
+ "logps/chosen": -216.7061309814453,
1986
+ "logps/rejected": -287.44635009765625,
1987
+ "loss": 0.1335,
1988
+ "rewards/accuracies": 0.9300000071525574,
1989
+ "rewards/chosen": -4.068508148193359,
1990
+ "rewards/margins": 7.39614725112915,
1991
+ "rewards/rejected": -11.464654922485352,
1992
+ "step": 1130
1993
+ },
1994
+ {
1995
+ "epoch": 1.2666666666666666,
1996
+ "grad_norm": 0.6432875394821167,
1997
+ "learning_rate": 1.7829919182222752e-06,
1998
+ "logits/chosen": -1.4469690322875977,
1999
+ "logits/rejected": -1.4537835121154785,
2000
+ "logps/chosen": -218.46372985839844,
2001
+ "logps/rejected": -278.9936828613281,
2002
+ "loss": 0.2379,
2003
+ "rewards/accuracies": 0.9200000166893005,
2004
+ "rewards/chosen": -4.34934139251709,
2005
+ "rewards/margins": 6.168898105621338,
2006
+ "rewards/rejected": -10.518239974975586,
2007
+ "step": 1140
2008
+ },
2009
+ {
2010
+ "epoch": 1.2666666666666666,
2011
+ "eval_logits/chosen": -1.4457852840423584,
2012
+ "eval_logits/rejected": -1.4531958103179932,
2013
+ "eval_logps/chosen": -218.02694702148438,
2014
+ "eval_logps/rejected": -284.6993103027344,
2015
+ "eval_loss": 0.1678517907857895,
2016
+ "eval_rewards/accuracies": 0.9259999990463257,
2017
+ "eval_rewards/chosen": -4.299802780151367,
2018
+ "eval_rewards/margins": 6.809408664703369,
2019
+ "eval_rewards/rejected": -11.109211921691895,
2020
+ "eval_runtime": 319.0708,
2021
+ "eval_samples_per_second": 3.134,
2022
+ "eval_steps_per_second": 0.313,
2023
+ "step": 1140
2024
+ },
2025
+ {
2026
+ "epoch": 1.2777777777777777,
2027
+ "grad_norm": 2.2014999389648438,
2028
+ "learning_rate": 1.7366850057622176e-06,
2029
+ "logits/chosen": -1.4449026584625244,
2030
+ "logits/rejected": -1.4521173238754272,
2031
+ "logps/chosen": -222.32373046875,
2032
+ "logps/rejected": -287.82470703125,
2033
+ "loss": 0.124,
2034
+ "rewards/accuracies": 0.9200000166893005,
2035
+ "rewards/chosen": -4.7135515213012695,
2036
+ "rewards/margins": 6.7242326736450195,
2037
+ "rewards/rejected": -11.437784194946289,
2038
+ "step": 1150
2039
+ },
2040
+ {
2041
+ "epoch": 1.2888888888888888,
2042
+ "grad_norm": 7.591695308685303,
2043
+ "learning_rate": 1.6906651448541977e-06,
2044
+ "logits/chosen": -1.44550621509552,
2045
+ "logits/rejected": -1.452260971069336,
2046
+ "logps/chosen": -216.9026641845703,
2047
+ "logps/rejected": -290.4498291015625,
2048
+ "loss": 0.1115,
2049
+ "rewards/accuracies": 0.9600000381469727,
2050
+ "rewards/chosen": -4.313070774078369,
2051
+ "rewards/margins": 7.246844291687012,
2052
+ "rewards/rejected": -11.559915542602539,
2053
+ "step": 1160
2054
+ },
2055
+ {
2056
+ "epoch": 1.3,
2057
+ "grad_norm": 0.5335530042648315,
2058
+ "learning_rate": 1.6449496416858285e-06,
2059
+ "logits/chosen": -1.4446995258331299,
2060
+ "logits/rejected": -1.4526121616363525,
2061
+ "logps/chosen": -217.58978271484375,
2062
+ "logps/rejected": -297.33953857421875,
2063
+ "loss": 0.1353,
2064
+ "rewards/accuracies": 0.9300000667572021,
2065
+ "rewards/chosen": -4.29036283493042,
2066
+ "rewards/margins": 8.026262283325195,
2067
+ "rewards/rejected": -12.316625595092773,
2068
+ "step": 1170
2069
+ },
2070
+ {
2071
+ "epoch": 1.3111111111111111,
2072
+ "grad_norm": 11.03783130645752,
2073
+ "learning_rate": 1.5995556879882246e-06,
2074
+ "logits/chosen": -1.44581937789917,
2075
+ "logits/rejected": -1.45389986038208,
2076
+ "logps/chosen": -220.682373046875,
2077
+ "logps/rejected": -293.34466552734375,
2078
+ "loss": 0.1417,
2079
+ "rewards/accuracies": 0.9500000476837158,
2080
+ "rewards/chosen": -4.592069625854492,
2081
+ "rewards/margins": 7.323507308959961,
2082
+ "rewards/rejected": -11.915576934814453,
2083
+ "step": 1180
2084
+ },
2085
+ {
2086
+ "epoch": 1.3222222222222222,
2087
+ "grad_norm": 47.34039306640625,
2088
+ "learning_rate": 1.5545003545708942e-06,
2089
+ "logits/chosen": -1.443078875541687,
2090
+ "logits/rejected": -1.4502229690551758,
2091
+ "logps/chosen": -221.423583984375,
2092
+ "logps/rejected": -293.61309814453125,
2093
+ "loss": 0.1844,
2094
+ "rewards/accuracies": 0.9200000166893005,
2095
+ "rewards/chosen": -4.658720016479492,
2096
+ "rewards/margins": 7.329789161682129,
2097
+ "rewards/rejected": -11.988508224487305,
2098
+ "step": 1190
2099
+ },
2100
+ {
2101
+ "epoch": 1.3333333333333333,
2102
+ "grad_norm": 7.4648566246032715,
2103
+ "learning_rate": 1.509800584902108e-06,
2104
+ "logits/chosen": -1.445673942565918,
2105
+ "logits/rejected": -1.4527684450149536,
2106
+ "logps/chosen": -214.28665161132812,
2107
+ "logps/rejected": -296.835205078125,
2108
+ "loss": 0.0571,
2109
+ "rewards/accuracies": 0.9700000286102295,
2110
+ "rewards/chosen": -4.044223308563232,
2111
+ "rewards/margins": 8.145161628723145,
2112
+ "rewards/rejected": -12.189384460449219,
2113
+ "step": 1200
2114
+ },
2115
+ {
2116
+ "epoch": 1.3333333333333333,
2117
+ "eval_logits/chosen": -1.4334654808044434,
2118
+ "eval_logits/rejected": -1.4415010213851929,
2119
+ "eval_logps/chosen": -220.21426391601562,
2120
+ "eval_logps/rejected": -297.70947265625,
2121
+ "eval_loss": 0.16259507834911346,
2122
+ "eval_rewards/accuracies": 0.9420000314712524,
2123
+ "eval_rewards/chosen": -4.518533706665039,
2124
+ "eval_rewards/margins": 7.891695022583008,
2125
+ "eval_rewards/rejected": -12.410228729248047,
2126
+ "eval_runtime": 319.1244,
2127
+ "eval_samples_per_second": 3.134,
2128
+ "eval_steps_per_second": 0.313,
2129
+ "step": 1200
2130
+ },
2131
+ {
2132
+ "epoch": 1.3444444444444446,
2133
+ "grad_norm": 55.29027557373047,
2134
+ "learning_rate": 1.4654731887371524e-06,
2135
+ "logits/chosen": -1.4422087669372559,
2136
+ "logits/rejected": -1.447311520576477,
2137
+ "logps/chosen": -219.22817993164062,
2138
+ "logps/rejected": -296.27703857421875,
2139
+ "loss": 0.2174,
2140
+ "rewards/accuracies": 0.9100000262260437,
2141
+ "rewards/chosen": -4.672391414642334,
2142
+ "rewards/margins": 7.32711124420166,
2143
+ "rewards/rejected": -11.999502182006836,
2144
+ "step": 1210
2145
+ },
2146
+ {
2147
+ "epoch": 1.3555555555555556,
2148
+ "grad_norm": 4.867663860321045,
2149
+ "learning_rate": 1.421534835796867e-06,
2150
+ "logits/chosen": -1.441540241241455,
2151
+ "logits/rejected": -1.4484856128692627,
2152
+ "logps/chosen": -218.04440307617188,
2153
+ "logps/rejected": -293.49969482421875,
2154
+ "loss": 0.1568,
2155
+ "rewards/accuracies": 0.9500000476837158,
2156
+ "rewards/chosen": -4.3516693115234375,
2157
+ "rewards/margins": 7.608930587768555,
2158
+ "rewards/rejected": -11.960600852966309,
2159
+ "step": 1220
2160
+ },
2161
+ {
2162
+ "epoch": 1.3666666666666667,
2163
+ "grad_norm": 20.99898910522461,
2164
+ "learning_rate": 1.3780020494988447e-06,
2165
+ "logits/chosen": -1.4409953355789185,
2166
+ "logits/rejected": -1.448107361793518,
2167
+ "logps/chosen": -211.9335174560547,
2168
+ "logps/rejected": -297.6470947265625,
2169
+ "loss": 0.0491,
2170
+ "rewards/accuracies": 0.9800000190734863,
2171
+ "rewards/chosen": -3.8174076080322266,
2172
+ "rewards/margins": 8.474346160888672,
2173
+ "rewards/rejected": -12.291754722595215,
2174
+ "step": 1230
2175
+ },
2176
+ {
2177
+ "epoch": 1.3777777777777778,
2178
+ "grad_norm": 110.80073547363281,
2179
+ "learning_rate": 1.3348912007436538e-06,
2180
+ "logits/chosen": -1.4384217262268066,
2181
+ "logits/rejected": -1.4469711780548096,
2182
+ "logps/chosen": -221.45703125,
2183
+ "logps/rejected": -297.884521484375,
2184
+ "loss": 0.2962,
2185
+ "rewards/accuracies": 0.9000000357627869,
2186
+ "rewards/chosen": -4.6395368576049805,
2187
+ "rewards/margins": 7.799114227294922,
2188
+ "rewards/rejected": -12.438650131225586,
2189
+ "step": 1240
2190
+ },
2191
+ {
2192
+ "epoch": 1.3888888888888888,
2193
+ "grad_norm": 11.451473236083984,
2194
+ "learning_rate": 1.2922185017584038e-06,
2195
+ "logits/chosen": -1.4418102502822876,
2196
+ "logits/rejected": -1.450040340423584,
2197
+ "logps/chosen": -224.67489624023438,
2198
+ "logps/rejected": -294.7509460449219,
2199
+ "loss": 0.2274,
2200
+ "rewards/accuracies": 0.9300000667572021,
2201
+ "rewards/chosen": -4.944138526916504,
2202
+ "rewards/margins": 7.171684265136719,
2203
+ "rewards/rejected": -12.115822792053223,
2204
+ "step": 1250
2205
+ },
2206
+ {
2207
+ "epoch": 1.4,
2208
+ "grad_norm": 49.401031494140625,
2209
+ "learning_rate": 1.2500000000000007e-06,
2210
+ "logits/chosen": -1.4403979778289795,
2211
+ "logits/rejected": -1.4471863508224487,
2212
+ "logps/chosen": -212.93165588378906,
2213
+ "logps/rejected": -292.7206726074219,
2214
+ "loss": 0.1644,
2215
+ "rewards/accuracies": 0.9300000667572021,
2216
+ "rewards/chosen": -3.9882044792175293,
2217
+ "rewards/margins": 7.7179412841796875,
2218
+ "rewards/rejected": -11.706144332885742,
2219
+ "step": 1260
2220
+ },
2221
+ {
2222
+ "epoch": 1.4,
2223
+ "eval_logits/chosen": -1.4410432577133179,
2224
+ "eval_logits/rejected": -1.4497298002243042,
2225
+ "eval_logps/chosen": -218.07644653320312,
2226
+ "eval_logps/rejected": -295.89495849609375,
2227
+ "eval_loss": 0.1613789200782776,
2228
+ "eval_rewards/accuracies": 0.9399999976158142,
2229
+ "eval_rewards/chosen": -4.304754734039307,
2230
+ "eval_rewards/margins": 7.924018383026123,
2231
+ "eval_rewards/rejected": -12.228774070739746,
2232
+ "eval_runtime": 319.1171,
2233
+ "eval_samples_per_second": 3.134,
2234
+ "eval_steps_per_second": 0.313,
2235
+ "step": 1260
2236
+ },
2237
+ {
2238
+ "epoch": 1.411111111111111,
2239
+ "grad_norm": 3.6920242309570312,
2240
+ "learning_rate": 1.2082515721203429e-06,
2241
+ "logits/chosen": -1.439995527267456,
2242
+ "logits/rejected": -1.446855068206787,
2243
+ "logps/chosen": -213.84771728515625,
2244
+ "logps/rejected": -295.51678466796875,
2245
+ "loss": 0.1399,
2246
+ "rewards/accuracies": 0.9300000667572021,
2247
+ "rewards/chosen": -4.055336952209473,
2248
+ "rewards/margins": 7.958105087280273,
2249
+ "rewards/rejected": -12.01344108581543,
2250
+ "step": 1270
2251
+ },
2252
+ {
2253
+ "epoch": 1.4222222222222223,
2254
+ "grad_norm": 52.6472282409668,
2255
+ "learning_rate": 1.1669889179957725e-06,
2256
+ "logits/chosen": -1.4401957988739014,
2257
+ "logits/rejected": -1.4491486549377441,
2258
+ "logps/chosen": -213.06893920898438,
2259
+ "logps/rejected": -299.53369140625,
2260
+ "loss": 0.1568,
2261
+ "rewards/accuracies": 0.9500000476837158,
2262
+ "rewards/chosen": -3.898712396621704,
2263
+ "rewards/margins": 8.592363357543945,
2264
+ "rewards/rejected": -12.49107551574707,
2265
+ "step": 1280
2266
+ },
2267
+ {
2268
+ "epoch": 1.4333333333333333,
2269
+ "grad_norm": 29.101463317871094,
2270
+ "learning_rate": 1.1262275548229852e-06,
2271
+ "logits/chosen": -1.441546082496643,
2272
+ "logits/rejected": -1.4492754936218262,
2273
+ "logps/chosen": -215.4649200439453,
2274
+ "logps/rejected": -297.18133544921875,
2275
+ "loss": 0.1692,
2276
+ "rewards/accuracies": 0.9500000476837158,
2277
+ "rewards/chosen": -4.13901424407959,
2278
+ "rewards/margins": 8.12405776977539,
2279
+ "rewards/rejected": -12.263072967529297,
2280
+ "step": 1290
2281
+ },
2282
+ {
2283
+ "epoch": 1.4444444444444444,
2284
+ "grad_norm": 34.07436752319336,
2285
+ "learning_rate": 1.085982811283654e-06,
2286
+ "logits/chosen": -1.4384413957595825,
2287
+ "logits/rejected": -1.448547601699829,
2288
+ "logps/chosen": -223.9049530029297,
2289
+ "logps/rejected": -299.6859436035156,
2290
+ "loss": 0.2549,
2291
+ "rewards/accuracies": 0.9100000262260437,
2292
+ "rewards/chosen": -4.689306259155273,
2293
+ "rewards/margins": 8.127752304077148,
2294
+ "rewards/rejected": -12.817058563232422,
2295
+ "step": 1300
2296
+ },
2297
+ {
2298
+ "epoch": 1.4555555555555555,
2299
+ "grad_norm": 1.7382193803787231,
2300
+ "learning_rate": 1.0462698217799333e-06,
2301
+ "logits/chosen": -1.4337615966796875,
2302
+ "logits/rejected": -1.4433856010437012,
2303
+ "logps/chosen": -220.635009765625,
2304
+ "logps/rejected": -301.463623046875,
2305
+ "loss": 0.0488,
2306
+ "rewards/accuracies": 0.9900000095367432,
2307
+ "rewards/chosen": -4.439496994018555,
2308
+ "rewards/margins": 8.465967178344727,
2309
+ "rewards/rejected": -12.905464172363281,
2310
+ "step": 1310
2311
+ },
2312
+ {
2313
+ "epoch": 1.4666666666666668,
2314
+ "grad_norm": 24.08198356628418,
2315
+ "learning_rate": 1.0071035207430352e-06,
2316
+ "logits/chosen": -1.4395148754119873,
2317
+ "logits/rejected": -1.4469318389892578,
2318
+ "logps/chosen": -217.70321655273438,
2319
+ "logps/rejected": -301.6554260253906,
2320
+ "loss": 0.3264,
2321
+ "rewards/accuracies": 0.9199999570846558,
2322
+ "rewards/chosen": -4.422707557678223,
2323
+ "rewards/margins": 8.232616424560547,
2324
+ "rewards/rejected": -12.655323028564453,
2325
+ "step": 1320
2326
+ },
2327
+ {
2328
+ "epoch": 1.4666666666666668,
2329
+ "eval_logits/chosen": -1.4390203952789307,
2330
+ "eval_logits/rejected": -1.447505235671997,
2331
+ "eval_logps/chosen": -220.72488403320312,
2332
+ "eval_logps/rejected": -299.20281982421875,
2333
+ "eval_loss": 0.14269497990608215,
2334
+ "eval_rewards/accuracies": 0.9470000267028809,
2335
+ "eval_rewards/chosen": -4.569596290588379,
2336
+ "eval_rewards/margins": 7.98996639251709,
2337
+ "eval_rewards/rejected": -12.559562683105469,
2338
+ "eval_runtime": 319.0786,
2339
+ "eval_samples_per_second": 3.134,
2340
+ "eval_steps_per_second": 0.313,
2341
+ "step": 1320
2342
+ },
2343
+ {
2344
+ "epoch": 1.4777777777777779,
2345
+ "grad_norm": 33.78030014038086,
2346
+ "learning_rate": 9.68498637016993e-07,
2347
+ "logits/chosen": -1.4397677183151245,
2348
+ "logits/rejected": -1.447797417640686,
2349
+ "logps/chosen": -215.69036865234375,
2350
+ "logps/rejected": -304.0943603515625,
2351
+ "loss": 0.0708,
2352
+ "rewards/accuracies": 0.9700000286102295,
2353
+ "rewards/chosen": -4.21912145614624,
2354
+ "rewards/margins": 8.63876724243164,
2355
+ "rewards/rejected": -12.857889175415039,
2356
+ "step": 1330
2357
+ },
2358
+ {
2359
+ "epoch": 1.488888888888889,
2360
+ "grad_norm": 12.473185539245605,
2361
+ "learning_rate": 9.304696883197542e-07,
2362
+ "logits/chosen": -1.4403018951416016,
2363
+ "logits/rejected": -1.4468640089035034,
2364
+ "logps/chosen": -221.52301025390625,
2365
+ "logps/rejected": -301.3069763183594,
2366
+ "loss": 0.1152,
2367
+ "rewards/accuracies": 0.9600000381469727,
2368
+ "rewards/chosen": -4.801990509033203,
2369
+ "rewards/margins": 7.815496921539307,
2370
+ "rewards/rejected": -12.617486953735352,
2371
+ "step": 1340
2372
+ },
2373
+ {
2374
+ "epoch": 1.5,
2375
+ "grad_norm": 8.766846656799316,
2376
+ "learning_rate": 8.930309757836517e-07,
2377
+ "logits/chosen": -1.4371258020401,
2378
+ "logits/rejected": -1.4433132410049438,
2379
+ "logps/chosen": -213.47586059570312,
2380
+ "logps/rejected": -302.5564270019531,
2381
+ "loss": 0.1309,
2382
+ "rewards/accuracies": 0.940000057220459,
2383
+ "rewards/chosen": -4.185782432556152,
2384
+ "rewards/margins": 8.38406753540039,
2385
+ "rewards/rejected": -12.569849014282227,
2386
+ "step": 1350
2387
+ },
2388
+ {
2389
+ "epoch": 1.511111111111111,
2390
+ "grad_norm": 2.927105665206909,
2391
+ "learning_rate": 8.561965785773413e-07,
2392
+ "logits/chosen": -1.4389441013336182,
2393
+ "logits/rejected": -1.4470137357711792,
2394
+ "logps/chosen": -220.51834106445312,
2395
+ "logps/rejected": -297.39044189453125,
2396
+ "loss": 0.1829,
2397
+ "rewards/accuracies": 0.9300000071525574,
2398
+ "rewards/chosen": -4.525027751922607,
2399
+ "rewards/margins": 7.858166694641113,
2400
+ "rewards/rejected": -12.383193969726562,
2401
+ "step": 1360
2402
+ },
2403
+ {
2404
+ "epoch": 1.5222222222222221,
2405
+ "grad_norm": 18.771081924438477,
2406
+ "learning_rate": 8.19980348611194e-07,
2407
+ "logits/chosen": -1.437861680984497,
2408
+ "logits/rejected": -1.4455113410949707,
2409
+ "logps/chosen": -220.85772705078125,
2410
+ "logps/rejected": -301.1795654296875,
2411
+ "loss": 0.2494,
2412
+ "rewards/accuracies": 0.9200000166893005,
2413
+ "rewards/chosen": -4.576181411743164,
2414
+ "rewards/margins": 8.187376022338867,
2415
+ "rewards/rejected": -12.763558387756348,
2416
+ "step": 1370
2417
+ },
2418
+ {
2419
+ "epoch": 1.5333333333333332,
2420
+ "grad_norm": 1.1617432832717896,
2421
+ "learning_rate": 7.843959053281663e-07,
2422
+ "logits/chosen": -1.434956669807434,
2423
+ "logits/rejected": -1.4432401657104492,
2424
+ "logps/chosen": -211.97430419921875,
2425
+ "logps/rejected": -305.6387023925781,
2426
+ "loss": 0.1088,
2427
+ "rewards/accuracies": 0.9700000286102295,
2428
+ "rewards/chosen": -3.911351203918457,
2429
+ "rewards/margins": 9.050506591796875,
2430
+ "rewards/rejected": -12.961858749389648,
2431
+ "step": 1380
2432
+ },
2433
+ {
2434
+ "epoch": 1.5333333333333332,
2435
+ "eval_logits/chosen": -1.4380238056182861,
2436
+ "eval_logits/rejected": -1.4465129375457764,
2437
+ "eval_logps/chosen": -221.4553680419922,
2438
+ "eval_logps/rejected": -301.4556884765625,
2439
+ "eval_loss": 0.1381780505180359,
2440
+ "eval_rewards/accuracies": 0.9509999752044678,
2441
+ "eval_rewards/chosen": -4.642644882202148,
2442
+ "eval_rewards/margins": 8.142204284667969,
2443
+ "eval_rewards/rejected": -12.784847259521484,
2444
+ "eval_runtime": 319.0431,
2445
+ "eval_samples_per_second": 3.134,
2446
+ "eval_steps_per_second": 0.313,
2447
+ "step": 1380
2448
+ },
2449
+ {
2450
+ "epoch": 1.5444444444444443,
2451
+ "grad_norm": 0.9792585968971252,
2452
+ "learning_rate": 7.494566305820788e-07,
2453
+ "logits/chosen": -1.4381271600723267,
2454
+ "logits/rejected": -1.447120189666748,
2455
+ "logps/chosen": -219.04547119140625,
2456
+ "logps/rejected": -302.42169189453125,
2457
+ "loss": 0.0939,
2458
+ "rewards/accuracies": 0.9500000476837158,
2459
+ "rewards/chosen": -4.438849925994873,
2460
+ "rewards/margins": 8.397099494934082,
2461
+ "rewards/rejected": -12.835948944091797,
2462
+ "step": 1390
2463
+ },
2464
+ {
2465
+ "epoch": 1.5555555555555556,
2466
+ "grad_norm": 75.47477722167969,
2467
+ "learning_rate": 7.151756636052529e-07,
2468
+ "logits/chosen": -1.4314817190170288,
2469
+ "logits/rejected": -1.441815972328186,
2470
+ "logps/chosen": -225.7080078125,
2471
+ "logps/rejected": -303.7359313964844,
2472
+ "loss": 0.2658,
2473
+ "rewards/accuracies": 0.9399999976158142,
2474
+ "rewards/chosen": -4.849039554595947,
2475
+ "rewards/margins": 8.38930892944336,
2476
+ "rewards/rejected": -13.238348007202148,
2477
+ "step": 1400
2478
+ },
2479
+ {
2480
+ "epoch": 1.5666666666666667,
2481
+ "grad_norm": 14.793700218200684,
2482
+ "learning_rate": 6.815658960673782e-07,
2483
+ "logits/chosen": -1.4330942630767822,
2484
+ "logits/rejected": -1.4421098232269287,
2485
+ "logps/chosen": -215.4171142578125,
2486
+ "logps/rejected": -309.46636962890625,
2487
+ "loss": 0.1275,
2488
+ "rewards/accuracies": 0.9600000381469727,
2489
+ "rewards/chosen": -4.201850891113281,
2490
+ "rewards/margins": 9.197854995727539,
2491
+ "rewards/rejected": -13.39970588684082,
2492
+ "step": 1410
2493
+ },
2494
+ {
2495
+ "epoch": 1.5777777777777777,
2496
+ "grad_norm": 44.33953857421875,
2497
+ "learning_rate": 6.48639967227489e-07,
2498
+ "logits/chosen": -1.43377685546875,
2499
+ "logits/rejected": -1.4425432682037354,
2500
+ "logps/chosen": -223.57232666015625,
2501
+ "logps/rejected": -300.73687744140625,
2502
+ "loss": 0.1086,
2503
+ "rewards/accuracies": 0.9600000381469727,
2504
+ "rewards/chosen": -4.74373722076416,
2505
+ "rewards/margins": 8.091646194458008,
2506
+ "rewards/rejected": -12.835383415222168,
2507
+ "step": 1420
2508
+ },
2509
+ {
2510
+ "epoch": 1.588888888888889,
2511
+ "grad_norm": 57.546730041503906,
2512
+ "learning_rate": 6.164102591808482e-07,
2513
+ "logits/chosen": -1.436528205871582,
2514
+ "logits/rejected": -1.4442325830459595,
2515
+ "logps/chosen": -223.30726623535156,
2516
+ "logps/rejected": -298.614501953125,
2517
+ "loss": 0.1289,
2518
+ "rewards/accuracies": 0.9500000476837158,
2519
+ "rewards/chosen": -4.813453197479248,
2520
+ "rewards/margins": 7.699684143066406,
2521
+ "rewards/rejected": -12.513137817382812,
2522
+ "step": 1430
2523
+ },
2524
+ {
2525
+ "epoch": 1.6,
2526
+ "grad_norm": 24.603193283081055,
2527
+ "learning_rate": 5.848888922025553e-07,
2528
+ "logits/chosen": -1.435572624206543,
2529
+ "logits/rejected": -1.4410022497177124,
2530
+ "logps/chosen": -222.442626953125,
2531
+ "logps/rejected": -297.5535888671875,
2532
+ "loss": 0.1853,
2533
+ "rewards/accuracies": 0.9399999976158142,
2534
+ "rewards/chosen": -5.006618022918701,
2535
+ "rewards/margins": 7.103509902954102,
2536
+ "rewards/rejected": -12.110126495361328,
2537
+ "step": 1440
2538
+ },
2539
+ {
2540
+ "epoch": 1.6,
2541
+ "eval_logits/chosen": -1.434856653213501,
2542
+ "eval_logits/rejected": -1.4433155059814453,
2543
+ "eval_logps/chosen": -225.01356506347656,
2544
+ "eval_logps/rejected": -305.67608642578125,
2545
+ "eval_loss": 0.1416788399219513,
2546
+ "eval_rewards/accuracies": 0.9490000009536743,
2547
+ "eval_rewards/chosen": -4.998464584350586,
2548
+ "eval_rewards/margins": 8.208425521850586,
2549
+ "eval_rewards/rejected": -13.206890106201172,
2550
+ "eval_runtime": 319.0443,
2551
+ "eval_samples_per_second": 3.134,
2552
+ "eval_steps_per_second": 0.313,
2553
+ "step": 1440
2554
+ },
2555
+ {
2556
+ "epoch": 1.6111111111111112,
2557
+ "grad_norm": 11.052775382995605,
2558
+ "learning_rate": 5.540877201896e-07,
2559
+ "logits/chosen": -1.4346046447753906,
2560
+ "logits/rejected": -1.441970705986023,
2561
+ "logps/chosen": -220.8565216064453,
2562
+ "logps/rejected": -309.455078125,
2563
+ "loss": 0.036,
2564
+ "rewards/accuracies": 1.0,
2565
+ "rewards/chosen": -4.751629829406738,
2566
+ "rewards/margins": 8.649368286132812,
2567
+ "rewards/rejected": -13.40099811553955,
2568
+ "step": 1450
2569
+ },
2570
+ {
2571
+ "epoch": 1.6222222222222222,
2572
+ "grad_norm": 155.28163146972656,
2573
+ "learning_rate": 5.240183262031021e-07,
2574
+ "logits/chosen": -1.4323256015777588,
2575
+ "logits/rejected": -1.4386875629425049,
2576
+ "logps/chosen": -223.56954956054688,
2577
+ "logps/rejected": -303.56964111328125,
2578
+ "loss": 0.1732,
2579
+ "rewards/accuracies": 0.9399999976158142,
2580
+ "rewards/chosen": -5.084565162658691,
2581
+ "rewards/margins": 7.685075759887695,
2582
+ "rewards/rejected": -12.76963996887207,
2583
+ "step": 1460
2584
+ },
2585
+ {
2586
+ "epoch": 1.6333333333333333,
2587
+ "grad_norm": 0.09031402319669724,
2588
+ "learning_rate": 4.946920181123904e-07,
2589
+ "logits/chosen": -1.4354331493377686,
2590
+ "logits/rejected": -1.4433681964874268,
2591
+ "logps/chosen": -218.1959228515625,
2592
+ "logps/rejected": -311.41815185546875,
2593
+ "loss": 0.0411,
2594
+ "rewards/accuracies": 0.9800000190734863,
2595
+ "rewards/chosen": -4.469109535217285,
2596
+ "rewards/margins": 9.170130729675293,
2597
+ "rewards/rejected": -13.639240264892578,
2598
+ "step": 1470
2599
+ },
2600
+ {
2601
+ "epoch": 1.6444444444444444,
2602
+ "grad_norm": 4.468040943145752,
2603
+ "learning_rate": 4.661198243425813e-07,
2604
+ "logits/chosen": -1.4358713626861572,
2605
+ "logits/rejected": -1.4435877799987793,
2606
+ "logps/chosen": -220.62884521484375,
2607
+ "logps/rejected": -308.4977722167969,
2608
+ "loss": 0.1746,
2609
+ "rewards/accuracies": 0.9100000262260437,
2610
+ "rewards/chosen": -4.726615905761719,
2611
+ "rewards/margins": 8.57591724395752,
2612
+ "rewards/rejected": -13.302533149719238,
2613
+ "step": 1480
2614
+ },
2615
+ {
2616
+ "epoch": 1.6555555555555554,
2617
+ "grad_norm": 24.597213745117188,
2618
+ "learning_rate": 4.383124897272331e-07,
2619
+ "logits/chosen": -1.4311268329620361,
2620
+ "logits/rejected": -1.4438539743423462,
2621
+ "logps/chosen": -223.80532836914062,
2622
+ "logps/rejected": -316.3698425292969,
2623
+ "loss": 0.1137,
2624
+ "rewards/accuracies": 0.9500000476837158,
2625
+ "rewards/chosen": -4.6827216148376465,
2626
+ "rewards/margins": 9.79144287109375,
2627
+ "rewards/rejected": -14.474164009094238,
2628
+ "step": 1490
2629
+ },
2630
+ {
2631
+ "epoch": 1.6666666666666665,
2632
+ "grad_norm": 11.660173416137695,
2633
+ "learning_rate": 4.1128047146765936e-07,
2634
+ "logits/chosen": -1.4334840774536133,
2635
+ "logits/rejected": -1.4423227310180664,
2636
+ "logps/chosen": -222.2215118408203,
2637
+ "logps/rejected": -310.5578308105469,
2638
+ "loss": 0.1406,
2639
+ "rewards/accuracies": 0.9700000286102295,
2640
+ "rewards/chosen": -4.7878241539001465,
2641
+ "rewards/margins": 8.839967727661133,
2642
+ "rewards/rejected": -13.627790451049805,
2643
+ "step": 1500
2644
+ },
2645
+ {
2646
+ "epoch": 1.6666666666666665,
2647
+ "eval_logits/chosen": -1.4283111095428467,
2648
+ "eval_logits/rejected": -1.4372782707214355,
2649
+ "eval_logps/chosen": -226.19561767578125,
2650
+ "eval_logps/rejected": -312.00286865234375,
2651
+ "eval_loss": 0.17411097884178162,
2652
+ "eval_rewards/accuracies": 0.9409999847412109,
2653
+ "eval_rewards/chosen": -5.116670608520508,
2654
+ "eval_rewards/margins": 8.722896575927734,
2655
+ "eval_rewards/rejected": -13.839567184448242,
2656
+ "eval_runtime": 319.1464,
2657
+ "eval_samples_per_second": 3.133,
2658
+ "eval_steps_per_second": 0.313,
2659
+ "step": 1500
2660
+ }
2661
+ ],
2662
+ "logging_steps": 10,
2663
+ "max_steps": 1800,
2664
+ "num_input_tokens_seen": 0,
2665
+ "num_train_epochs": 2,
2666
+ "save_steps": 500,
2667
+ "stateful_callbacks": {
2668
+ "TrainerControl": {
2669
+ "args": {
2670
+ "should_epoch_stop": false,
2671
+ "should_evaluate": false,
2672
+ "should_log": false,
2673
+ "should_save": true,
2674
+ "should_training_stop": false
2675
+ },
2676
+ "attributes": {}
2677
+ }
2678
+ },
2679
+ "total_flos": 2.389743103691981e+18,
2680
+ "train_batch_size": 5,
2681
+ "trial_name": null,
2682
+ "trial_params": null
2683
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d11f32108ca68e7f7be84e704fa987417996a33cca0180d79a224d4ab67c5e2
3
+ size 5432
checkpoint-1800/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-1800/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-1800/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e929aec41513f971396ff6eb8d073add48bdab0433a50a2e50924265184fd9
3
+ size 114106856
checkpoint-1800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7476f960e6510cf62a4272d5c522e72606893358f79ff2c62c5351fadb4af16b
3
+ size 228536930
checkpoint-1800/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5db53e6fd9ea942cffb2a503a92101260e99739282b9267864fb9a071f5d9db
3
+ size 14512
checkpoint-1800/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa280337f74d7a7c41c964c05d2e81a3c235047f454f288af7f8093c05aaa6de
3
+ size 14512
checkpoint-1800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb483a8e7dd5355fed20928eab72aeae44d85edb6fe3657395ddb2a2378d0f4
3
+ size 1064
checkpoint-1800/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1800/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
checkpoint-1800/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1800/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d11f32108ca68e7f7be84e704fa987417996a33cca0180d79a224d4ab67c5e2
3
+ size 5432
checkpoint-500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
checkpoint-500/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-Nemo-Instruct-2407",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
checkpoint-500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cad76816df43f088f194ec9b396545509c6e8461c95b494bfe904ea2d364a16
3
+ size 114106856
checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6144fbd20ddb6fae6e8990c275eb44417f707107757e2dc152065ad5489e65eb
3
+ size 228536930
checkpoint-500/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91bd7f619e4cd37883f469c08e90105c4d218fd82ffc43ae58fa9fdbcc37fce5
3
+ size 14512
checkpoint-500/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0a7593f9ab52bf47328c6d50954dce1fcd69866aa6f5f35851aef7f7af3899
3
+ size 14512
checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95306122c9f694159537f567e93069f7e38f21c7a30dfe2e6afacbda8a7f8777
3
+ size 1064
checkpoint-500/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292