kubernetes-bad commited on
Commit
6f3a34a
·
verified ·
1 Parent(s): f7c403a

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: NewEden/trashdwag
3
+ library_name: peft
4
+ license: other
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: tinymagnum-r2-KTO-r1
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # tinymagnum-r2-KTO-r1
18
+
19
+ This model is a fine-tuned version of [NewEden/trashdwag](https://huggingface.co/NewEden/trashdwag) on the combined_kto.json dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.5003
22
+ - Rewards/chosen: 0.0061
23
+ - Logps/chosen: -12.0862
24
+ - Rewards/rejected: 0.0023
25
+ - Logps/rejected: -16.1405
26
+ - Rewards/margins: 0.0039
27
+ - Kl: 0.0447
28
+
29
+ ## Model description
30
+
31
+ More information needed
32
+
33
+ ## Intended uses & limitations
34
+
35
+ More information needed
36
+
37
+ ## Training and evaluation data
38
+
39
+ More information needed
40
+
41
+ ## Training procedure
42
+
43
+ ### Training hyperparameters
44
+
45
+ The following hyperparameters were used during training:
46
+ - learning_rate: 1e-05
47
+ - train_batch_size: 2
48
+ - eval_batch_size: 2
49
+ - seed: 42
50
+ - distributed_type: multi-GPU
51
+ - num_devices: 2
52
+ - gradient_accumulation_steps: 16
53
+ - total_train_batch_size: 64
54
+ - total_eval_batch_size: 4
55
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
56
+ - lr_scheduler_type: cosine
57
+ - lr_scheduler_warmup_ratio: 0.25
58
+ - num_epochs: 1.0
59
+
60
+ ### Training results
61
+
62
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Logps/chosen | Rewards/rejected | Logps/rejected | Rewards/margins | Kl |
63
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:------------:|:----------------:|:--------------:|:---------------:|:------:|
64
+ | 0.5025 | 0.1078 | 16 | 0.5038 | 0.0004 | -12.1438 | 0.0007 | -16.1563 | -0.0003 | 0.0099 |
65
+ | 0.502 | 0.2157 | 32 | 0.5019 | 0.0033 | -12.1150 | 0.0018 | -16.1450 | 0.0014 | 0.0200 |
66
+ | 0.5026 | 0.3235 | 48 | 0.5013 | 0.0051 | -12.0964 | 0.0027 | -16.1358 | 0.0024 | 0.0335 |
67
+ | 0.5021 | 0.4313 | 64 | 0.5015 | 0.0058 | -12.0893 | 0.0036 | -16.1270 | 0.0022 | 0.0406 |
68
+ | 0.5017 | 0.5392 | 80 | 0.5012 | 0.0064 | -12.0833 | 0.0037 | -16.1265 | 0.0027 | 0.0434 |
69
+ | 0.5003 | 0.6470 | 96 | 0.5007 | 0.0066 | -12.0812 | 0.0032 | -16.1311 | 0.0034 | 0.0431 |
70
+ | 0.4996 | 0.7548 | 112 | 0.5012 | 0.0063 | -12.0846 | 0.0028 | -16.1353 | 0.0035 | 0.0437 |
71
+ | 0.5077 | 0.8627 | 128 | 0.5005 | 0.0063 | -12.0844 | 0.0026 | -16.1374 | 0.0037 | 0.0433 |
72
+ | 0.5012 | 0.9705 | 144 | 0.5004 | 0.0064 | -12.0837 | 0.0023 | -16.1401 | 0.0041 | 0.0431 |
73
+
74
+
75
+ ### Framework versions
76
+
77
+ - PEFT 0.12.0
78
+ - Transformers 4.45.0.dev0
79
+ - Pytorch 2.3.0a0+ebedce2
80
+ - Datasets 2.20.0
81
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "NewEden/trashdwag",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 64,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "gate_proj",
24
+ "up_proj",
25
+ "down_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "v_proj",
29
+ "o_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3701a838c5f5c77154041037fd751d07be4e6e5c61b54591106cae778f2b566b
3
+ size 243330248
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9974726200505476,
3
+ "eval_kl": 0.04474467411637306,
4
+ "eval_logps/chosen": -12.086200252242152,
5
+ "eval_logps/rejected": -16.140498082129962,
6
+ "eval_loss": 0.5002864599227905,
7
+ "eval_rewards/chosen": 0.006133331311657824,
8
+ "eval_rewards/margins": 0.003854976257181193,
9
+ "eval_rewards/rejected": 0.0022783550544766312,
10
+ "eval_runtime": 116.8015,
11
+ "eval_samples_per_second": 4.281,
12
+ "eval_steps_per_second": 1.07,
13
+ "total_flos": 2.1537474524636774e+17,
14
+ "train_loss": 0.5015917984214989,
15
+ "train_runtime": 4835.0561,
16
+ "train_samples_per_second": 1.964,
17
+ "train_steps_per_second": 0.031
18
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9974726200505476,
3
+ "eval_kl": 0.04474467411637306,
4
+ "eval_logps/chosen": -12.086200252242152,
5
+ "eval_logps/rejected": -16.140498082129962,
6
+ "eval_loss": 0.5002864599227905,
7
+ "eval_rewards/chosen": 0.006133331311657824,
8
+ "eval_rewards/margins": 0.003854976257181193,
9
+ "eval_rewards/rejected": 0.0022783550544766312,
10
+ "eval_runtime": 116.8015,
11
+ "eval_samples_per_second": 4.281,
12
+ "eval_steps_per_second": 1.07
13
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "bos_token": {
12
+ "content": "<|begin_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<|finetune_right_pad_id|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,2068 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|im_start|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|im_end|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "additional_special_tokens": [
2053
+ "<|im_start|>"
2054
+ ],
2055
+ "bos_token": "<|begin_of_text|>",
2056
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
2057
+ "clean_up_tokenization_spaces": true,
2058
+ "eos_token": "<|im_end|>",
2059
+ "model_input_names": [
2060
+ "input_ids",
2061
+ "attention_mask"
2062
+ ],
2063
+ "model_max_length": 131072,
2064
+ "pad_token": "<|finetune_right_pad_id|>",
2065
+ "padding_side": "right",
2066
+ "split_special_tokens": false,
2067
+ "tokenizer_class": "PreTrainedTokenizerFast"
2068
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9974726200505476,
3
+ "total_flos": 2.1537474524636774e+17,
4
+ "train_loss": 0.5015917984214989,
5
+ "train_runtime": 4835.0561,
6
+ "train_samples_per_second": 1.964,
7
+ "train_steps_per_second": 0.031
8
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 148, "loss": 0.5, "learning_rate": 2.702702702702703e-07, "epoch": 0.006739679865206402, "percentage": 0.68, "elapsed_time": "0:00:23", "remaining_time": "0:58:47", "throughput": "0.00", "total_tokens": 0}
2
+ {"current_steps": 2, "total_steps": 148, "loss": 0.5, "learning_rate": 5.405405405405406e-07, "epoch": 0.013479359730412805, "percentage": 1.35, "elapsed_time": "0:00:49", "remaining_time": "0:59:43", "throughput": "0.00", "total_tokens": 0}
3
+ {"current_steps": 3, "total_steps": 148, "loss": 0.5056, "learning_rate": 8.108108108108109e-07, "epoch": 0.020219039595619208, "percentage": 2.03, "elapsed_time": "0:01:17", "remaining_time": "1:02:19", "throughput": "0.00", "total_tokens": 0}
4
+ {"current_steps": 4, "total_steps": 148, "loss": 0.503, "learning_rate": 1.0810810810810812e-06, "epoch": 0.02695871946082561, "percentage": 2.7, "elapsed_time": "0:01:42", "remaining_time": "1:01:39", "throughput": "0.00", "total_tokens": 0}
5
+ {"current_steps": 5, "total_steps": 148, "loss": 0.5045, "learning_rate": 1.3513513513513515e-06, "epoch": 0.03369839932603201, "percentage": 3.38, "elapsed_time": "0:02:07", "remaining_time": "1:00:57", "throughput": "0.00", "total_tokens": 0}
6
+ {"current_steps": 6, "total_steps": 148, "loss": 0.5043, "learning_rate": 1.6216216216216219e-06, "epoch": 0.040438079191238416, "percentage": 4.05, "elapsed_time": "0:02:35", "remaining_time": "1:01:29", "throughput": "0.00", "total_tokens": 0}
7
+ {"current_steps": 7, "total_steps": 148, "loss": 0.5044, "learning_rate": 1.8918918918918922e-06, "epoch": 0.04717775905644482, "percentage": 4.73, "elapsed_time": "0:03:02", "remaining_time": "1:01:24", "throughput": "0.00", "total_tokens": 0}
8
+ {"current_steps": 8, "total_steps": 148, "loss": 0.5046, "learning_rate": 2.1621621621621623e-06, "epoch": 0.05391743892165122, "percentage": 5.41, "elapsed_time": "0:03:30", "remaining_time": "1:01:31", "throughput": "0.00", "total_tokens": 0}
9
+ {"current_steps": 9, "total_steps": 148, "loss": 0.5041, "learning_rate": 2.432432432432433e-06, "epoch": 0.060657118786857624, "percentage": 6.08, "elapsed_time": "0:03:56", "remaining_time": "1:00:51", "throughput": "0.00", "total_tokens": 0}
10
+ {"current_steps": 10, "total_steps": 148, "loss": 0.5039, "learning_rate": 2.702702702702703e-06, "epoch": 0.06739679865206402, "percentage": 6.76, "elapsed_time": "0:04:23", "remaining_time": "1:00:30", "throughput": "0.00", "total_tokens": 0}
11
+ {"current_steps": 11, "total_steps": 148, "loss": 0.5025, "learning_rate": 2.9729729729729736e-06, "epoch": 0.07413647851727043, "percentage": 7.43, "elapsed_time": "0:04:50", "remaining_time": "1:00:15", "throughput": "0.00", "total_tokens": 0}
12
+ {"current_steps": 12, "total_steps": 148, "loss": 0.5031, "learning_rate": 3.2432432432432437e-06, "epoch": 0.08087615838247683, "percentage": 8.11, "elapsed_time": "0:05:16", "remaining_time": "0:59:43", "throughput": "0.00", "total_tokens": 0}
13
+ {"current_steps": 13, "total_steps": 148, "loss": 0.5036, "learning_rate": 3.513513513513514e-06, "epoch": 0.08761583824768324, "percentage": 8.78, "elapsed_time": "0:05:45", "remaining_time": "0:59:51", "throughput": "0.00", "total_tokens": 0}
14
+ {"current_steps": 14, "total_steps": 148, "loss": 0.5027, "learning_rate": 3.7837837837837844e-06, "epoch": 0.09435551811288964, "percentage": 9.46, "elapsed_time": "0:06:09", "remaining_time": "0:59:00", "throughput": "0.00", "total_tokens": 0}
15
+ {"current_steps": 15, "total_steps": 148, "loss": 0.5047, "learning_rate": 4.0540540540540545e-06, "epoch": 0.10109519797809605, "percentage": 10.14, "elapsed_time": "0:06:33", "remaining_time": "0:58:06", "throughput": "0.00", "total_tokens": 0}
16
+ {"current_steps": 16, "total_steps": 148, "loss": 0.5025, "learning_rate": 4.324324324324325e-06, "epoch": 0.10783487784330244, "percentage": 10.81, "elapsed_time": "0:06:59", "remaining_time": "0:57:42", "throughput": "0.00", "total_tokens": 0}
17
+ {"current_steps": 16, "total_steps": 148, "eval_loss": 0.5038444399833679, "epoch": 0.10783487784330244, "percentage": 10.81, "elapsed_time": "0:08:56", "remaining_time": "1:13:44", "throughput": "0.00", "total_tokens": 0}
18
+ {"current_steps": 17, "total_steps": 148, "loss": 0.502, "learning_rate": 4.594594594594596e-06, "epoch": 0.11457455770850884, "percentage": 11.49, "elapsed_time": "0:09:23", "remaining_time": "1:12:21", "throughput": "0.00", "total_tokens": 0}
19
+ {"current_steps": 18, "total_steps": 148, "loss": 0.5034, "learning_rate": 4.864864864864866e-06, "epoch": 0.12131423757371525, "percentage": 12.16, "elapsed_time": "0:09:46", "remaining_time": "1:10:39", "throughput": "0.00", "total_tokens": 0}
20
+ {"current_steps": 19, "total_steps": 148, "loss": 0.5016, "learning_rate": 5.135135135135135e-06, "epoch": 0.12805391743892164, "percentage": 12.84, "elapsed_time": "0:10:11", "remaining_time": "1:09:10", "throughput": "0.00", "total_tokens": 0}
21
+ {"current_steps": 20, "total_steps": 148, "loss": 0.5043, "learning_rate": 5.405405405405406e-06, "epoch": 0.13479359730412804, "percentage": 13.51, "elapsed_time": "0:10:35", "remaining_time": "1:07:47", "throughput": "0.00", "total_tokens": 0}
22
+ {"current_steps": 21, "total_steps": 148, "loss": 0.5057, "learning_rate": 5.675675675675676e-06, "epoch": 0.14153327716933445, "percentage": 14.19, "elapsed_time": "0:11:01", "remaining_time": "1:06:39", "throughput": "0.00", "total_tokens": 0}
23
+ {"current_steps": 22, "total_steps": 148, "loss": 0.5025, "learning_rate": 5.945945945945947e-06, "epoch": 0.14827295703454085, "percentage": 14.86, "elapsed_time": "0:11:26", "remaining_time": "1:05:34", "throughput": "0.00", "total_tokens": 0}
24
+ {"current_steps": 23, "total_steps": 148, "loss": 0.5057, "learning_rate": 6.2162162162162164e-06, "epoch": 0.15501263689974726, "percentage": 15.54, "elapsed_time": "0:11:50", "remaining_time": "1:04:22", "throughput": "0.00", "total_tokens": 0}
25
+ {"current_steps": 24, "total_steps": 148, "loss": 0.5035, "learning_rate": 6.486486486486487e-06, "epoch": 0.16175231676495366, "percentage": 16.22, "elapsed_time": "0:12:16", "remaining_time": "1:03:25", "throughput": "0.00", "total_tokens": 0}
26
+ {"current_steps": 25, "total_steps": 148, "loss": 0.5027, "learning_rate": 6.7567567567567575e-06, "epoch": 0.16849199663016007, "percentage": 16.89, "elapsed_time": "0:12:42", "remaining_time": "1:02:30", "throughput": "0.00", "total_tokens": 0}
27
+ {"current_steps": 26, "total_steps": 148, "loss": 0.5048, "learning_rate": 7.027027027027028e-06, "epoch": 0.17523167649536647, "percentage": 17.57, "elapsed_time": "0:13:10", "remaining_time": "1:01:48", "throughput": "0.00", "total_tokens": 0}
28
+ {"current_steps": 27, "total_steps": 148, "loss": 0.5033, "learning_rate": 7.297297297297298e-06, "epoch": 0.18197135636057288, "percentage": 18.24, "elapsed_time": "0:13:33", "remaining_time": "1:00:44", "throughput": "0.00", "total_tokens": 0}
29
+ {"current_steps": 28, "total_steps": 148, "loss": 0.5019, "learning_rate": 7.567567567567569e-06, "epoch": 0.18871103622577928, "percentage": 18.92, "elapsed_time": "0:13:57", "remaining_time": "0:59:50", "throughput": "0.00", "total_tokens": 0}
30
+ {"current_steps": 29, "total_steps": 148, "loss": 0.5008, "learning_rate": 7.837837837837838e-06, "epoch": 0.1954507160909857, "percentage": 19.59, "elapsed_time": "0:14:25", "remaining_time": "0:59:09", "throughput": "0.00", "total_tokens": 0}
31
+ {"current_steps": 30, "total_steps": 148, "loss": 0.501, "learning_rate": 8.108108108108109e-06, "epoch": 0.2021903959561921, "percentage": 20.27, "elapsed_time": "0:14:48", "remaining_time": "0:58:16", "throughput": "0.00", "total_tokens": 0}
32
+ {"current_steps": 31, "total_steps": 148, "loss": 0.5081, "learning_rate": 8.378378378378378e-06, "epoch": 0.20893007582139847, "percentage": 20.95, "elapsed_time": "0:15:14", "remaining_time": "0:57:29", "throughput": "0.00", "total_tokens": 0}
33
+ {"current_steps": 32, "total_steps": 148, "loss": 0.502, "learning_rate": 8.64864864864865e-06, "epoch": 0.21566975568660487, "percentage": 21.62, "elapsed_time": "0:15:39", "remaining_time": "0:56:46", "throughput": "0.00", "total_tokens": 0}
34
+ {"current_steps": 32, "total_steps": 148, "eval_loss": 0.5019354224205017, "epoch": 0.21566975568660487, "percentage": 21.62, "elapsed_time": "0:17:36", "remaining_time": "1:03:50", "throughput": "0.00", "total_tokens": 0}
35
+ {"current_steps": 33, "total_steps": 148, "loss": 0.502, "learning_rate": 8.91891891891892e-06, "epoch": 0.22240943555181128, "percentage": 22.3, "elapsed_time": "0:18:00", "remaining_time": "1:02:45", "throughput": "0.00", "total_tokens": 0}
36
+ {"current_steps": 34, "total_steps": 148, "loss": 0.5019, "learning_rate": 9.189189189189191e-06, "epoch": 0.22914911541701768, "percentage": 22.97, "elapsed_time": "0:18:26", "remaining_time": "1:01:50", "throughput": "0.00", "total_tokens": 0}
37
+ {"current_steps": 35, "total_steps": 148, "loss": 0.5022, "learning_rate": 9.45945945945946e-06, "epoch": 0.2358887952822241, "percentage": 23.65, "elapsed_time": "0:18:53", "remaining_time": "1:00:58", "throughput": "0.00", "total_tokens": 0}
38
+ {"current_steps": 36, "total_steps": 148, "loss": 0.5009, "learning_rate": 9.729729729729732e-06, "epoch": 0.2426284751474305, "percentage": 24.32, "elapsed_time": "0:19:18", "remaining_time": "1:00:03", "throughput": "0.00", "total_tokens": 0}
39
+ {"current_steps": 37, "total_steps": 148, "loss": 0.5031, "learning_rate": 1e-05, "epoch": 0.2493681550126369, "percentage": 25.0, "elapsed_time": "0:19:44", "remaining_time": "0:59:13", "throughput": "0.00", "total_tokens": 0}
40
+ {"current_steps": 38, "total_steps": 148, "loss": 0.5015, "learning_rate": 9.99799753559161e-06, "epoch": 0.2561078348778433, "percentage": 25.68, "elapsed_time": "0:20:08", "remaining_time": "0:58:18", "throughput": "0.00", "total_tokens": 0}
41
+ {"current_steps": 39, "total_steps": 148, "loss": 0.5059, "learning_rate": 9.991991746311916e-06, "epoch": 0.2628475147430497, "percentage": 26.35, "elapsed_time": "0:20:34", "remaining_time": "0:57:29", "throughput": "0.00", "total_tokens": 0}
42
+ {"current_steps": 40, "total_steps": 148, "loss": 0.5025, "learning_rate": 9.981987442712634e-06, "epoch": 0.2695871946082561, "percentage": 27.03, "elapsed_time": "0:20:58", "remaining_time": "0:56:37", "throughput": "0.00", "total_tokens": 0}
43
+ {"current_steps": 41, "total_steps": 148, "loss": 0.5005, "learning_rate": 9.967992638098517e-06, "epoch": 0.2763268744734625, "percentage": 27.7, "elapsed_time": "0:21:23", "remaining_time": "0:55:48", "throughput": "0.00", "total_tokens": 0}
44
+ {"current_steps": 42, "total_steps": 148, "loss": 0.5015, "learning_rate": 9.950018542108818e-06, "epoch": 0.2830665543386689, "percentage": 28.38, "elapsed_time": "0:21:47", "remaining_time": "0:55:00", "throughput": "0.00", "total_tokens": 0}
45
+ {"current_steps": 43, "total_steps": 148, "loss": 0.5018, "learning_rate": 9.928079551738542e-06, "epoch": 0.2898062342038753, "percentage": 29.05, "elapsed_time": "0:22:13", "remaining_time": "0:54:17", "throughput": "0.00", "total_tokens": 0}
46
+ {"current_steps": 44, "total_steps": 148, "loss": 0.5004, "learning_rate": 9.902193239806634e-06, "epoch": 0.2965459140690817, "percentage": 29.73, "elapsed_time": "0:22:38", "remaining_time": "0:53:31", "throughput": "0.00", "total_tokens": 0}
47
+ {"current_steps": 45, "total_steps": 148, "loss": 0.5008, "learning_rate": 9.872380340880416e-06, "epoch": 0.3032855939342881, "percentage": 30.41, "elapsed_time": "0:23:06", "remaining_time": "0:52:53", "throughput": "0.00", "total_tokens": 0}
48
+ {"current_steps": 46, "total_steps": 148, "loss": 0.5012, "learning_rate": 9.838664734667496e-06, "epoch": 0.3100252737994945, "percentage": 31.08, "elapsed_time": "0:23:31", "remaining_time": "0:52:09", "throughput": "0.00", "total_tokens": 0}
49
+ {"current_steps": 47, "total_steps": 148, "loss": 0.5007, "learning_rate": 9.801073426888447e-06, "epoch": 0.3167649536647009, "percentage": 31.76, "elapsed_time": "0:23:57", "remaining_time": "0:51:29", "throughput": "0.00", "total_tokens": 0}
50
+ {"current_steps": 48, "total_steps": 148, "loss": 0.5026, "learning_rate": 9.759636527645633e-06, "epoch": 0.3235046335299073, "percentage": 32.43, "elapsed_time": "0:24:23", "remaining_time": "0:50:49", "throughput": "0.00", "total_tokens": 0}
51
+ {"current_steps": 48, "total_steps": 148, "eval_loss": 0.5012729167938232, "epoch": 0.3235046335299073, "percentage": 32.43, "elapsed_time": "0:26:20", "remaining_time": "0:54:53", "throughput": "0.00", "total_tokens": 0}
52
+ {"current_steps": 49, "total_steps": 148, "loss": 0.5007, "learning_rate": 9.714387227305422e-06, "epoch": 0.33024431339511373, "percentage": 33.11, "elapsed_time": "0:26:46", "remaining_time": "0:54:05", "throughput": "0.00", "total_tokens": 0}
53
+ {"current_steps": 50, "total_steps": 148, "loss": 0.5021, "learning_rate": 9.665361769913187e-06, "epoch": 0.33698399326032014, "percentage": 33.78, "elapsed_time": "0:27:12", "remaining_time": "0:53:20", "throughput": "0.00", "total_tokens": 0}
54
+ {"current_steps": 51, "total_steps": 148, "loss": 0.5002, "learning_rate": 9.612599424162344e-06, "epoch": 0.34372367312552654, "percentage": 34.46, "elapsed_time": "0:27:37", "remaining_time": "0:52:33", "throughput": "0.00", "total_tokens": 0}
55
+ {"current_steps": 52, "total_steps": 148, "loss": 0.5083, "learning_rate": 9.55614245194068e-06, "epoch": 0.35046335299073295, "percentage": 35.14, "elapsed_time": "0:28:03", "remaining_time": "0:51:48", "throughput": "0.00", "total_tokens": 0}
56
+ {"current_steps": 53, "total_steps": 148, "loss": 0.5006, "learning_rate": 9.496036074479184e-06, "epoch": 0.35720303285593935, "percentage": 35.81, "elapsed_time": "0:28:27", "remaining_time": "0:51:01", "throughput": "0.00", "total_tokens": 0}
57
+ {"current_steps": 54, "total_steps": 148, "loss": 0.5013, "learning_rate": 9.432328436130493e-06, "epoch": 0.36394271272114576, "percentage": 36.49, "elapsed_time": "0:28:54", "remaining_time": "0:50:19", "throughput": "0.00", "total_tokens": 0}
58
+ {"current_steps": 55, "total_steps": 148, "loss": 0.5021, "learning_rate": 9.365070565805941e-06, "epoch": 0.37068239258635216, "percentage": 37.16, "elapsed_time": "0:29:18", "remaining_time": "0:49:33", "throughput": "0.00", "total_tokens": 0}
59
+ {"current_steps": 56, "total_steps": 148, "loss": 0.5008, "learning_rate": 9.294316336102132e-06, "epoch": 0.37742207245155857, "percentage": 37.84, "elapsed_time": "0:29:42", "remaining_time": "0:48:48", "throughput": "0.00", "total_tokens": 0}
60
+ {"current_steps": 57, "total_steps": 148, "loss": 0.5, "learning_rate": 9.220122420149753e-06, "epoch": 0.38416175231676497, "percentage": 38.51, "elapsed_time": "0:30:05", "remaining_time": "0:48:02", "throughput": "0.00", "total_tokens": 0}
61
+ {"current_steps": 58, "total_steps": 148, "loss": 0.5016, "learning_rate": 9.142548246219212e-06, "epoch": 0.3909014321819714, "percentage": 39.19, "elapsed_time": "0:30:29", "remaining_time": "0:47:19", "throughput": "0.00", "total_tokens": 0}
62
+ {"current_steps": 59, "total_steps": 148, "loss": 0.5015, "learning_rate": 9.06165595011943e-06, "epoch": 0.3976411120471778, "percentage": 39.86, "elapsed_time": "0:30:54", "remaining_time": "0:46:37", "throughput": "0.00", "total_tokens": 0}
63
+ {"current_steps": 60, "total_steps": 148, "loss": 0.4998, "learning_rate": 8.97751032542795e-06, "epoch": 0.4043807919123842, "percentage": 40.54, "elapsed_time": "0:31:20", "remaining_time": "0:45:57", "throughput": "0.00", "total_tokens": 0}
64
+ {"current_steps": 61, "total_steps": 148, "loss": 0.5007, "learning_rate": 8.890178771592198e-06, "epoch": 0.4111204717775906, "percentage": 41.22, "elapsed_time": "0:31:47", "remaining_time": "0:45:20", "throughput": "0.00", "total_tokens": 0}
65
+ {"current_steps": 62, "total_steps": 148, "loss": 0.5016, "learning_rate": 8.799731239943488e-06, "epoch": 0.41786015164279694, "percentage": 41.89, "elapsed_time": "0:32:11", "remaining_time": "0:44:39", "throughput": "0.00", "total_tokens": 0}
66
+ {"current_steps": 63, "total_steps": 148, "loss": 0.5058, "learning_rate": 8.706240177667003e-06, "epoch": 0.42459983150800334, "percentage": 42.57, "elapsed_time": "0:32:36", "remaining_time": "0:43:59", "throughput": "0.00", "total_tokens": 0}
67
+ {"current_steps": 64, "total_steps": 148, "loss": 0.5021, "learning_rate": 8.609780469772623e-06, "epoch": 0.43133951137320975, "percentage": 43.24, "elapsed_time": "0:33:01", "remaining_time": "0:43:20", "throughput": "0.00", "total_tokens": 0}
68
+ {"current_steps": 64, "total_steps": 148, "eval_loss": 0.5015159249305725, "epoch": 0.43133951137320975, "percentage": 43.24, "elapsed_time": "0:34:58", "remaining_time": "0:45:53", "throughput": "0.00", "total_tokens": 0}
69
+ {"current_steps": 65, "total_steps": 148, "loss": 0.5008, "learning_rate": 8.510429379113114e-06, "epoch": 0.43807919123841615, "percentage": 43.92, "elapsed_time": "0:35:25", "remaining_time": "0:45:13", "throughput": "0.00", "total_tokens": 0}
70
+ {"current_steps": 66, "total_steps": 148, "loss": 0.4999, "learning_rate": 8.408266484497664e-06, "epoch": 0.44481887110362256, "percentage": 44.59, "elapsed_time": "0:35:50", "remaining_time": "0:44:31", "throughput": "0.00", "total_tokens": 0}
71
+ {"current_steps": 67, "total_steps": 148, "loss": 0.5002, "learning_rate": 8.303373616950408e-06, "epoch": 0.45155855096882896, "percentage": 45.27, "elapsed_time": "0:36:13", "remaining_time": "0:43:47", "throughput": "0.00", "total_tokens": 0}
72
+ {"current_steps": 68, "total_steps": 148, "loss": 0.501, "learning_rate": 8.195834794164925e-06, "epoch": 0.45829823083403537, "percentage": 45.95, "elapsed_time": "0:36:40", "remaining_time": "0:43:08", "throughput": "0.00", "total_tokens": 0}
73
+ {"current_steps": 69, "total_steps": 148, "loss": 0.5007, "learning_rate": 8.085736153207277e-06, "epoch": 0.4650379106992418, "percentage": 46.62, "elapsed_time": "0:37:05", "remaining_time": "0:42:27", "throughput": "0.00", "total_tokens": 0}
74
+ {"current_steps": 70, "total_steps": 148, "loss": 0.5003, "learning_rate": 7.973165881521435e-06, "epoch": 0.4717775905644482, "percentage": 47.3, "elapsed_time": "0:37:28", "remaining_time": "0:41:45", "throughput": "0.00", "total_tokens": 0}
75
+ {"current_steps": 71, "total_steps": 148, "loss": 0.5001, "learning_rate": 7.858214146292394e-06, "epoch": 0.4785172704296546, "percentage": 47.97, "elapsed_time": "0:37:58", "remaining_time": "0:41:10", "throughput": "0.00", "total_tokens": 0}
76
+ {"current_steps": 72, "total_steps": 148, "loss": 0.5002, "learning_rate": 7.74097302222355e-06, "epoch": 0.485256950294861, "percentage": 48.65, "elapsed_time": "0:38:21", "remaining_time": "0:40:29", "throughput": "0.00", "total_tokens": 0}
77
+ {"current_steps": 73, "total_steps": 148, "loss": 0.5004, "learning_rate": 7.621536417786159e-06, "epoch": 0.4919966301600674, "percentage": 49.32, "elapsed_time": "0:38:49", "remaining_time": "0:39:53", "throughput": "0.00", "total_tokens": 0}
78
+ {"current_steps": 74, "total_steps": 148, "loss": 0.5006, "learning_rate": 7.500000000000001e-06, "epoch": 0.4987363100252738, "percentage": 50.0, "elapsed_time": "0:39:15", "remaining_time": "0:39:15", "throughput": "0.00", "total_tokens": 0}
79
+ {"current_steps": 75, "total_steps": 148, "loss": 0.501, "learning_rate": 7.37646111780545e-06, "epoch": 0.5054759898904801, "percentage": 50.68, "elapsed_time": "0:39:41", "remaining_time": "0:38:37", "throughput": "0.00", "total_tokens": 0}
80
+ {"current_steps": 76, "total_steps": 148, "loss": 0.5005, "learning_rate": 7.251018724088367e-06, "epoch": 0.5122156697556866, "percentage": 51.35, "elapsed_time": "0:40:07", "remaining_time": "0:38:01", "throughput": "0.00", "total_tokens": 0}
81
+ {"current_steps": 77, "total_steps": 148, "loss": 0.4998, "learning_rate": 7.12377329642024e-06, "epoch": 0.518955349620893, "percentage": 52.03, "elapsed_time": "0:40:34", "remaining_time": "0:37:25", "throughput": "0.00", "total_tokens": 0}
82
+ {"current_steps": 78, "total_steps": 148, "loss": 0.5005, "learning_rate": 6.994826756577082e-06, "epoch": 0.5256950294860994, "percentage": 52.7, "elapsed_time": "0:40:59", "remaining_time": "0:36:47", "throughput": "0.00", "total_tokens": 0}
83
+ {"current_steps": 79, "total_steps": 148, "loss": 0.5, "learning_rate": 6.864282388901544e-06, "epoch": 0.5324347093513058, "percentage": 53.38, "elapsed_time": "0:41:26", "remaining_time": "0:36:12", "throughput": "0.00", "total_tokens": 0}
84
+ {"current_steps": 80, "total_steps": 148, "loss": 0.5017, "learning_rate": 6.732244757573619e-06, "epoch": 0.5391743892165122, "percentage": 54.05, "elapsed_time": "0:41:50", "remaining_time": "0:35:34", "throughput": "0.00", "total_tokens": 0}
85
+ {"current_steps": 80, "total_steps": 148, "eval_loss": 0.501190721988678, "epoch": 0.5391743892165122, "percentage": 54.05, "elapsed_time": "0:43:47", "remaining_time": "0:37:13", "throughput": "0.00", "total_tokens": 0}
86
+ {"current_steps": 81, "total_steps": 148, "loss": 0.5024, "learning_rate": 6.598819622856227e-06, "epoch": 0.5459140690817186, "percentage": 54.73, "elapsed_time": "0:44:15", "remaining_time": "0:36:36", "throughput": "0.00", "total_tokens": 0}
87
+ {"current_steps": 82, "total_steps": 148, "loss": 0.5005, "learning_rate": 6.464113856382752e-06, "epoch": 0.552653748946925, "percentage": 55.41, "elapsed_time": "0:44:39", "remaining_time": "0:35:56", "throughput": "0.00", "total_tokens": 0}
88
+ {"current_steps": 83, "total_steps": 148, "loss": 0.5003, "learning_rate": 6.328235355554382e-06, "epoch": 0.5593934288121314, "percentage": 56.08, "elapsed_time": "0:45:07", "remaining_time": "0:35:20", "throughput": "0.00", "total_tokens": 0}
89
+ {"current_steps": 84, "total_steps": 148, "loss": 0.5017, "learning_rate": 6.191292957115825e-06, "epoch": 0.5661331086773378, "percentage": 56.76, "elapsed_time": "0:45:33", "remaining_time": "0:34:42", "throughput": "0.00", "total_tokens": 0}
90
+ {"current_steps": 85, "total_steps": 148, "loss": 0.5021, "learning_rate": 6.053396349978632e-06, "epoch": 0.5728727885425442, "percentage": 57.43, "elapsed_time": "0:45:59", "remaining_time": "0:34:05", "throughput": "0.00", "total_tokens": 0}
91
+ {"current_steps": 86, "total_steps": 148, "loss": 0.5013, "learning_rate": 5.914655987361934e-06, "epoch": 0.5796124684077506, "percentage": 58.11, "elapsed_time": "0:46:25", "remaining_time": "0:33:28", "throughput": "0.00", "total_tokens": 0}
92
+ {"current_steps": 87, "total_steps": 148, "loss": 0.5004, "learning_rate": 5.77518299832099e-06, "epoch": 0.586352148272957, "percentage": 58.78, "elapsed_time": "0:46:51", "remaining_time": "0:32:50", "throughput": "0.00", "total_tokens": 0}
93
+ {"current_steps": 88, "total_steps": 148, "loss": 0.5013, "learning_rate": 5.635089098734394e-06, "epoch": 0.5930918281381634, "percentage": 59.46, "elapsed_time": "0:47:16", "remaining_time": "0:32:13", "throughput": "0.00", "total_tokens": 0}
94
+ {"current_steps": 89, "total_steps": 148, "loss": 0.4999, "learning_rate": 5.49448650182125e-06, "epoch": 0.5998315080033698, "percentage": 60.14, "elapsed_time": "0:47:42", "remaining_time": "0:31:37", "throughput": "0.00", "total_tokens": 0}
95
+ {"current_steps": 90, "total_steps": 148, "loss": 0.5011, "learning_rate": 5.353487828259973e-06, "epoch": 0.6065711878685762, "percentage": 60.81, "elapsed_time": "0:48:07", "remaining_time": "0:31:00", "throughput": "0.00", "total_tokens": 0}
96
+ {"current_steps": 91, "total_steps": 148, "loss": 0.5011, "learning_rate": 5.212206015980742e-06, "epoch": 0.6133108677337826, "percentage": 61.49, "elapsed_time": "0:48:31", "remaining_time": "0:30:23", "throughput": "0.00", "total_tokens": 0}
97
+ {"current_steps": 92, "total_steps": 148, "loss": 0.5009, "learning_rate": 5.070754229703811e-06, "epoch": 0.620050547598989, "percentage": 62.16, "elapsed_time": "0:48:55", "remaining_time": "0:29:46", "throughput": "0.00", "total_tokens": 0}
98
+ {"current_steps": 93, "total_steps": 148, "loss": 0.5016, "learning_rate": 4.929245770296191e-06, "epoch": 0.6267902274641954, "percentage": 62.84, "elapsed_time": "0:49:22", "remaining_time": "0:29:12", "throughput": "0.00", "total_tokens": 0}
99
+ {"current_steps": 94, "total_steps": 148, "loss": 0.5015, "learning_rate": 4.78779398401926e-06, "epoch": 0.6335299073294018, "percentage": 63.51, "elapsed_time": "0:49:47", "remaining_time": "0:28:36", "throughput": "0.00", "total_tokens": 0}
100
+ {"current_steps": 95, "total_steps": 148, "loss": 0.5016, "learning_rate": 4.646512171740028e-06, "epoch": 0.6402695871946082, "percentage": 64.19, "elapsed_time": "0:50:13", "remaining_time": "0:28:01", "throughput": "0.00", "total_tokens": 0}
101
+ {"current_steps": 96, "total_steps": 148, "loss": 0.5003, "learning_rate": 4.505513498178752e-06, "epoch": 0.6470092670598147, "percentage": 64.86, "elapsed_time": "0:50:38", "remaining_time": "0:27:25", "throughput": "0.00", "total_tokens": 0}
102
+ {"current_steps": 96, "total_steps": 148, "eval_loss": 0.5007099509239197, "epoch": 0.6470092670598147, "percentage": 64.86, "elapsed_time": "0:52:35", "remaining_time": "0:28:29", "throughput": "0.00", "total_tokens": 0}
103
+ {"current_steps": 97, "total_steps": 148, "loss": 0.5012, "learning_rate": 4.364910901265607e-06, "epoch": 0.6537489469250211, "percentage": 65.54, "elapsed_time": "0:53:01", "remaining_time": "0:27:52", "throughput": "0.00", "total_tokens": 0}
104
+ {"current_steps": 98, "total_steps": 148, "loss": 0.504, "learning_rate": 4.224817001679011e-06, "epoch": 0.6604886267902275, "percentage": 66.22, "elapsed_time": "0:53:27", "remaining_time": "0:27:16", "throughput": "0.00", "total_tokens": 0}
105
+ {"current_steps": 99, "total_steps": 148, "loss": 0.4996, "learning_rate": 4.085344012638067e-06, "epoch": 0.6672283066554339, "percentage": 66.89, "elapsed_time": "0:53:50", "remaining_time": "0:26:39", "throughput": "0.00", "total_tokens": 0}
106
+ {"current_steps": 100, "total_steps": 148, "loss": 0.5039, "learning_rate": 3.94660365002137e-06, "epoch": 0.6739679865206403, "percentage": 67.57, "elapsed_time": "0:54:13", "remaining_time": "0:26:01", "throughput": "0.00", "total_tokens": 0}
107
+ {"current_steps": 101, "total_steps": 148, "loss": 0.5008, "learning_rate": 3.808707042884176e-06, "epoch": 0.6807076663858467, "percentage": 68.24, "elapsed_time": "0:54:38", "remaining_time": "0:25:25", "throughput": "0.00", "total_tokens": 0}
108
+ {"current_steps": 102, "total_steps": 148, "loss": 0.5003, "learning_rate": 3.6717646444456196e-06, "epoch": 0.6874473462510531, "percentage": 68.92, "elapsed_time": "0:55:03", "remaining_time": "0:24:49", "throughput": "0.00", "total_tokens": 0}
109
+ {"current_steps": 103, "total_steps": 148, "loss": 0.5002, "learning_rate": 3.5358861436172487e-06, "epoch": 0.6941870261162595, "percentage": 69.59, "elapsed_time": "0:55:28", "remaining_time": "0:24:14", "throughput": "0.00", "total_tokens": 0}
110
+ {"current_steps": 104, "total_steps": 148, "loss": 0.5003, "learning_rate": 3.401180377143774e-06, "epoch": 0.7009267059814659, "percentage": 70.27, "elapsed_time": "0:55:56", "remaining_time": "0:23:39", "throughput": "0.00", "total_tokens": 0}
111
+ {"current_steps": 105, "total_steps": 148, "loss": 0.5012, "learning_rate": 3.2677552424263836e-06, "epoch": 0.7076663858466723, "percentage": 70.95, "elapsed_time": "0:56:24", "remaining_time": "0:23:06", "throughput": "0.00", "total_tokens": 0}
112
+ {"current_steps": 106, "total_steps": 148, "loss": 0.5007, "learning_rate": 3.1357176110984578e-06, "epoch": 0.7144060657118787, "percentage": 71.62, "elapsed_time": "0:56:50", "remaining_time": "0:22:31", "throughput": "0.00", "total_tokens": 0}
113
+ {"current_steps": 107, "total_steps": 148, "loss": 0.5003, "learning_rate": 3.0051732434229185e-06, "epoch": 0.7211457455770851, "percentage": 72.3, "elapsed_time": "0:57:16", "remaining_time": "0:21:56", "throughput": "0.00", "total_tokens": 0}
114
+ {"current_steps": 108, "total_steps": 148, "loss": 0.4988, "learning_rate": 2.8762267035797607e-06, "epoch": 0.7278854254422915, "percentage": 72.97, "elapsed_time": "0:57:41", "remaining_time": "0:21:22", "throughput": "0.00", "total_tokens": 0}
115
+ {"current_steps": 109, "total_steps": 148, "loss": 0.4998, "learning_rate": 2.748981275911633e-06, "epoch": 0.7346251053074979, "percentage": 73.65, "elapsed_time": "0:58:05", "remaining_time": "0:20:46", "throughput": "0.00", "total_tokens": 0}
116
+ {"current_steps": 110, "total_steps": 148, "loss": 0.5013, "learning_rate": 2.6235388821945497e-06, "epoch": 0.7413647851727043, "percentage": 74.32, "elapsed_time": "0:58:28", "remaining_time": "0:20:11", "throughput": "0.00", "total_tokens": 0}
117
+ {"current_steps": 111, "total_steps": 148, "loss": 0.5006, "learning_rate": 2.5000000000000015e-06, "epoch": 0.7481044650379107, "percentage": 75.0, "elapsed_time": "0:58:53", "remaining_time": "0:19:37", "throughput": "0.00", "total_tokens": 0}
118
+ {"current_steps": 112, "total_steps": 148, "loss": 0.4996, "learning_rate": 2.3784635822138424e-06, "epoch": 0.7548441449031171, "percentage": 75.68, "elapsed_time": "0:59:19", "remaining_time": "0:19:04", "throughput": "0.00", "total_tokens": 0}
119
+ {"current_steps": 112, "total_steps": 148, "eval_loss": 0.5012484788894653, "epoch": 0.7548441449031171, "percentage": 75.68, "elapsed_time": "1:01:16", "remaining_time": "0:19:41", "throughput": "0.00", "total_tokens": 0}
120
+ {"current_steps": 113, "total_steps": 148, "loss": 0.5014, "learning_rate": 2.2590269777764516e-06, "epoch": 0.7615838247683235, "percentage": 76.35, "elapsed_time": "1:01:40", "remaining_time": "0:19:06", "throughput": "0.00", "total_tokens": 0}
121
+ {"current_steps": 114, "total_steps": 148, "loss": 0.5009, "learning_rate": 2.141785853707607e-06, "epoch": 0.7683235046335299, "percentage": 77.03, "elapsed_time": "1:02:04", "remaining_time": "0:18:30", "throughput": "0.00", "total_tokens": 0}
122
+ {"current_steps": 115, "total_steps": 148, "loss": 0.5002, "learning_rate": 2.0268341184785674e-06, "epoch": 0.7750631844987363, "percentage": 77.7, "elapsed_time": "1:02:33", "remaining_time": "0:17:57", "throughput": "0.00", "total_tokens": 0}
123
+ {"current_steps": 116, "total_steps": 148, "loss": 0.4998, "learning_rate": 1.9142638467927254e-06, "epoch": 0.7818028643639428, "percentage": 78.38, "elapsed_time": "1:03:00", "remaining_time": "0:17:22", "throughput": "0.00", "total_tokens": 0}
124
+ {"current_steps": 117, "total_steps": 148, "loss": 0.5042, "learning_rate": 1.8041652058350768e-06, "epoch": 0.7885425442291492, "percentage": 79.05, "elapsed_time": "1:03:26", "remaining_time": "0:16:48", "throughput": "0.00", "total_tokens": 0}
125
+ {"current_steps": 118, "total_steps": 148, "loss": 0.5008, "learning_rate": 1.6966263830495939e-06, "epoch": 0.7952822240943556, "percentage": 79.73, "elapsed_time": "1:03:49", "remaining_time": "0:16:13", "throughput": "0.00", "total_tokens": 0}
126
+ {"current_steps": 119, "total_steps": 148, "loss": 0.5, "learning_rate": 1.5917335155023368e-06, "epoch": 0.802021903959562, "percentage": 80.41, "elapsed_time": "1:04:16", "remaining_time": "0:15:39", "throughput": "0.00", "total_tokens": 0}
127
+ {"current_steps": 120, "total_steps": 148, "loss": 0.4998, "learning_rate": 1.4895706208868876e-06, "epoch": 0.8087615838247684, "percentage": 81.08, "elapsed_time": "1:04:41", "remaining_time": "0:15:05", "throughput": "0.00", "total_tokens": 0}
128
+ {"current_steps": 121, "total_steps": 148, "loss": 0.5009, "learning_rate": 1.390219530227378e-06, "epoch": 0.8155012636899748, "percentage": 81.76, "elapsed_time": "1:05:07", "remaining_time": "0:14:31", "throughput": "0.00", "total_tokens": 0}
129
+ {"current_steps": 122, "total_steps": 148, "loss": 0.5004, "learning_rate": 1.2937598223330006e-06, "epoch": 0.8222409435551812, "percentage": 82.43, "elapsed_time": "1:05:32", "remaining_time": "0:13:58", "throughput": "0.00", "total_tokens": 0}
130
+ {"current_steps": 123, "total_steps": 148, "loss": 0.5006, "learning_rate": 1.2002687600565138e-06, "epoch": 0.8289806234203876, "percentage": 83.11, "elapsed_time": "1:05:59", "remaining_time": "0:13:24", "throughput": "0.00", "total_tokens": 0}
131
+ {"current_steps": 124, "total_steps": 148, "loss": 0.5013, "learning_rate": 1.1098212284078037e-06, "epoch": 0.8357203032855939, "percentage": 83.78, "elapsed_time": "1:06:24", "remaining_time": "0:12:51", "throughput": "0.00", "total_tokens": 0}
132
+ {"current_steps": 125, "total_steps": 148, "loss": 0.4999, "learning_rate": 1.0224896745720513e-06, "epoch": 0.8424599831508003, "percentage": 84.46, "elapsed_time": "1:06:51", "remaining_time": "0:12:18", "throughput": "0.00", "total_tokens": 0}
133
+ {"current_steps": 126, "total_steps": 148, "loss": 0.4997, "learning_rate": 9.383440498805712e-07, "epoch": 0.8491996630160067, "percentage": 85.14, "elapsed_time": "1:07:16", "remaining_time": "0:11:44", "throughput": "0.00", "total_tokens": 0}
134
+ {"current_steps": 127, "total_steps": 148, "loss": 0.4999, "learning_rate": 8.574517537807897e-07, "epoch": 0.8559393428812131, "percentage": 85.81, "elapsed_time": "1:07:41", "remaining_time": "0:11:11", "throughput": "0.00", "total_tokens": 0}
135
+ {"current_steps": 128, "total_steps": 148, "loss": 0.5077, "learning_rate": 7.798775798502484e-07, "epoch": 0.8626790227464195, "percentage": 86.49, "elapsed_time": "1:08:07", "remaining_time": "0:10:38", "throughput": "0.00", "total_tokens": 0}
136
+ {"current_steps": 128, "total_steps": 148, "eval_loss": 0.5004793405532837, "epoch": 0.8626790227464195, "percentage": 86.49, "elapsed_time": "1:10:04", "remaining_time": "0:10:57", "throughput": "0.00", "total_tokens": 0}
137
+ {"current_steps": 129, "total_steps": 148, "loss": 0.5014, "learning_rate": 7.056836638978698e-07, "epoch": 0.8694187026116259, "percentage": 87.16, "elapsed_time": "1:10:33", "remaining_time": "0:10:23", "throughput": "0.00", "total_tokens": 0}
138
+ {"current_steps": 130, "total_steps": 148, "loss": 0.5008, "learning_rate": 6.349294341940593e-07, "epoch": 0.8761583824768323, "percentage": 87.84, "elapsed_time": "1:10:59", "remaining_time": "0:09:49", "throughput": "0.00", "total_tokens": 0}
139
+ {"current_steps": 131, "total_steps": 148, "loss": 0.5006, "learning_rate": 5.676715638695063e-07, "epoch": 0.8828980623420387, "percentage": 88.51, "elapsed_time": "1:11:23", "remaining_time": "0:09:15", "throughput": "0.00", "total_tokens": 0}
140
+ {"current_steps": 132, "total_steps": 148, "loss": 0.5004, "learning_rate": 5.039639255208156e-07, "epoch": 0.8896377422072451, "percentage": 89.19, "elapsed_time": "1:11:46", "remaining_time": "0:08:41", "throughput": "0.00", "total_tokens": 0}
141
+ {"current_steps": 133, "total_steps": 148, "loss": 0.5009, "learning_rate": 4.43857548059321e-07, "epoch": 0.8963774220724515, "percentage": 89.86, "elapsed_time": "1:12:11", "remaining_time": "0:08:08", "throughput": "0.00", "total_tokens": 0}
142
+ {"current_steps": 134, "total_steps": 148, "loss": 0.5005, "learning_rate": 3.87400575837657e-07, "epoch": 0.9031171019376579, "percentage": 90.54, "elapsed_time": "1:12:35", "remaining_time": "0:07:35", "throughput": "0.00", "total_tokens": 0}
143
+ {"current_steps": 135, "total_steps": 148, "loss": 0.501, "learning_rate": 3.346382300868134e-07, "epoch": 0.9098567818028643, "percentage": 91.22, "elapsed_time": "1:12:59", "remaining_time": "0:07:01", "throughput": "0.00", "total_tokens": 0}
144
+ {"current_steps": 136, "total_steps": 148, "loss": 0.5, "learning_rate": 2.85612772694579e-07, "epoch": 0.9165964616680707, "percentage": 91.89, "elapsed_time": "1:13:24", "remaining_time": "0:06:28", "throughput": "0.00", "total_tokens": 0}
145
+ {"current_steps": 137, "total_steps": 148, "loss": 0.5028, "learning_rate": 2.403634723543674e-07, "epoch": 0.9233361415332771, "percentage": 92.57, "elapsed_time": "1:13:48", "remaining_time": "0:05:55", "throughput": "0.00", "total_tokens": 0}
146
+ {"current_steps": 138, "total_steps": 148, "loss": 0.5002, "learning_rate": 1.989265731115525e-07, "epoch": 0.9300758213984835, "percentage": 93.24, "elapsed_time": "1:14:11", "remaining_time": "0:05:22", "throughput": "0.00", "total_tokens": 0}
147
+ {"current_steps": 139, "total_steps": 148, "loss": 0.4988, "learning_rate": 1.6133526533250566e-07, "epoch": 0.93681550126369, "percentage": 93.92, "elapsed_time": "1:14:36", "remaining_time": "0:04:49", "throughput": "0.00", "total_tokens": 0}
148
+ {"current_steps": 140, "total_steps": 148, "loss": 0.5017, "learning_rate": 1.2761965911958385e-07, "epoch": 0.9435551811288964, "percentage": 94.59, "elapsed_time": "1:15:02", "remaining_time": "0:04:17", "throughput": "0.00", "total_tokens": 0}
149
+ {"current_steps": 141, "total_steps": 148, "loss": 0.4998, "learning_rate": 9.780676019336632e-08, "epoch": 0.9502948609941028, "percentage": 95.27, "elapsed_time": "1:15:27", "remaining_time": "0:03:44", "throughput": "0.00", "total_tokens": 0}
150
+ {"current_steps": 142, "total_steps": 148, "loss": 0.4999, "learning_rate": 7.192044826145772e-08, "epoch": 0.9570345408593092, "percentage": 95.95, "elapsed_time": "1:15:53", "remaining_time": "0:03:12", "throughput": "0.00", "total_tokens": 0}
151
+ {"current_steps": 143, "total_steps": 148, "loss": 0.5005, "learning_rate": 4.998145789118114e-08, "epoch": 0.9637742207245156, "percentage": 96.62, "elapsed_time": "1:16:20", "remaining_time": "0:02:40", "throughput": "0.00", "total_tokens": 0}
152
+ {"current_steps": 144, "total_steps": 148, "loss": 0.5012, "learning_rate": 3.2007361901485455e-08, "epoch": 0.970513900589722, "percentage": 97.3, "elapsed_time": "1:16:47", "remaining_time": "0:02:07", "throughput": "0.00", "total_tokens": 0}
153
+ {"current_steps": 144, "total_steps": 148, "eval_loss": 0.5004004240036011, "epoch": 0.970513900589722, "percentage": 97.3, "elapsed_time": "1:18:44", "remaining_time": "0:02:11", "throughput": "0.00", "total_tokens": 0}
154
+ {"current_steps": 145, "total_steps": 148, "loss": 0.4988, "learning_rate": 1.8012557287367394e-08, "epoch": 0.9772535804549284, "percentage": 97.97, "elapsed_time": "1:19:11", "remaining_time": "0:01:38", "throughput": "0.00", "total_tokens": 0}
155
+ {"current_steps": 146, "total_steps": 148, "loss": 0.5013, "learning_rate": 8.008253688084888e-09, "epoch": 0.9839932603201348, "percentage": 98.65, "elapsed_time": "1:19:36", "remaining_time": "0:01:05", "throughput": "0.00", "total_tokens": 0}
156
+ {"current_steps": 147, "total_steps": 148, "loss": 0.5021, "learning_rate": 2.002464408392135e-09, "epoch": 0.9907329401853412, "percentage": 99.32, "elapsed_time": "1:20:02", "remaining_time": "0:00:32", "throughput": "0.00", "total_tokens": 0}
157
+ {"current_steps": 148, "total_steps": 148, "loss": 0.5016, "learning_rate": 0.0, "epoch": 0.9974726200505476, "percentage": 100.0, "elapsed_time": "1:20:29", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
158
+ {"current_steps": 148, "total_steps": 148, "epoch": 0.9974726200505476, "percentage": 100.0, "elapsed_time": "1:20:33", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
trainer_state.json ADDED
@@ -0,0 +1,2092 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9974726200505476,
5
+ "eval_steps": 16,
6
+ "global_step": 148,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.006739679865206402,
13
+ "grad_norm": 0.639816164970398,
14
+ "kl": 0.0,
15
+ "learning_rate": 2.702702702702703e-07,
16
+ "logps/chosen": -12.887619018554688,
17
+ "logps/rejected": -14.09291500515408,
18
+ "loss": 0.5,
19
+ "rewards/chosen": 0.0,
20
+ "rewards/margins": 0.0,
21
+ "rewards/rejected": 0.0,
22
+ "step": 1
23
+ },
24
+ {
25
+ "epoch": 0.013479359730412805,
26
+ "grad_norm": 0.9776865839958191,
27
+ "kl": 0.0,
28
+ "learning_rate": 5.405405405405406e-07,
29
+ "logps/chosen": -12.487646663890166,
30
+ "logps/rejected": -13.836714680989584,
31
+ "loss": 0.5,
32
+ "rewards/chosen": 0.0,
33
+ "rewards/margins": 0.0,
34
+ "rewards/rejected": 0.0,
35
+ "step": 2
36
+ },
37
+ {
38
+ "epoch": 0.020219039595619208,
39
+ "grad_norm": 1.2699075937271118,
40
+ "kl": 0.0047824084758758545,
41
+ "learning_rate": 8.108108108108109e-07,
42
+ "logps/chosen": -10.518890380859375,
43
+ "logps/rejected": -14.338980538504464,
44
+ "loss": 0.5056,
45
+ "rewards/chosen": -0.0008002470151103776,
46
+ "rewards/margins": -0.0007839773286115653,
47
+ "rewards/rejected": -1.6269686498812268e-05,
48
+ "step": 3
49
+ },
50
+ {
51
+ "epoch": 0.02695871946082561,
52
+ "grad_norm": 0.8817349076271057,
53
+ "kl": 0.0018611401319503784,
54
+ "learning_rate": 1.0810810810810812e-06,
55
+ "logps/chosen": -12.266104305491728,
56
+ "logps/rejected": -13.822291056315104,
57
+ "loss": 0.503,
58
+ "rewards/chosen": 0.00035604768816162557,
59
+ "rewards/margins": 0.00037912309087593767,
60
+ "rewards/rejected": -2.3075402714312078e-05,
61
+ "step": 4
62
+ },
63
+ {
64
+ "epoch": 0.03369839932603201,
65
+ "grad_norm": 1.0762742757797241,
66
+ "kl": 0.002072639763355255,
67
+ "learning_rate": 1.3513513513513515e-06,
68
+ "logps/chosen": -12.169254503752056,
69
+ "logps/rejected": -14.365091177133413,
70
+ "loss": 0.5045,
71
+ "rewards/chosen": -6.65488861207115e-05,
72
+ "rewards/margins": -0.00010910112201518979,
73
+ "rewards/rejected": 4.255223589447828e-05,
74
+ "step": 5
75
+ },
76
+ {
77
+ "epoch": 0.040438079191238416,
78
+ "grad_norm": 0.9145295023918152,
79
+ "kl": 0.007123664021492004,
80
+ "learning_rate": 1.6216216216216219e-06,
81
+ "logps/chosen": -11.956428773941532,
82
+ "logps/rejected": -15.07843202533144,
83
+ "loss": 0.5043,
84
+ "rewards/chosen": -9.064829998439358e-05,
85
+ "rewards/margins": -0.0003112042532143122,
86
+ "rewards/rejected": 0.00022055595322991863,
87
+ "step": 6
88
+ },
89
+ {
90
+ "epoch": 0.04717775905644482,
91
+ "grad_norm": 1.0839142799377441,
92
+ "kl": 0.0074616968631744385,
93
+ "learning_rate": 1.8918918918918922e-06,
94
+ "logps/chosen": -11.537665473090279,
95
+ "logps/rejected": -15.229617527553014,
96
+ "loss": 0.5044,
97
+ "rewards/chosen": -5.648369228260385e-05,
98
+ "rewards/margins": 0.0015003369430020925,
99
+ "rewards/rejected": -0.0015568206352846964,
100
+ "step": 7
101
+ },
102
+ {
103
+ "epoch": 0.05391743892165122,
104
+ "grad_norm": 1.325909972190857,
105
+ "kl": 0.00619843602180481,
106
+ "learning_rate": 2.1621621621621623e-06,
107
+ "logps/chosen": -12.332542017886514,
108
+ "logps/rejected": -15.343857985276442,
109
+ "loss": 0.5046,
110
+ "rewards/chosen": 0.00017708414969475647,
111
+ "rewards/margins": 7.641464490669702e-05,
112
+ "rewards/rejected": 0.00010066950478805945,
113
+ "step": 8
114
+ },
115
+ {
116
+ "epoch": 0.060657118786857624,
117
+ "grad_norm": 1.1749920845031738,
118
+ "kl": 0.008414536714553833,
119
+ "learning_rate": 2.432432432432433e-06,
120
+ "logps/chosen": -10.904085356613685,
121
+ "logps/rejected": -15.790567452566965,
122
+ "loss": 0.5041,
123
+ "rewards/chosen": -2.645625500008464e-05,
124
+ "rewards/margins": 0.00024653847296057004,
125
+ "rewards/rejected": -0.0002729947279606547,
126
+ "step": 9
127
+ },
128
+ {
129
+ "epoch": 0.06739679865206402,
130
+ "grad_norm": 1.0879679918289185,
131
+ "kl": 0.00559939444065094,
132
+ "learning_rate": 2.702702702702703e-06,
133
+ "logps/chosen": -12.677191734313965,
134
+ "logps/rejected": -16.435983657836914,
135
+ "loss": 0.5039,
136
+ "rewards/chosen": 0.00023014770704321563,
137
+ "rewards/margins": 0.0006161145865917206,
138
+ "rewards/rejected": -0.00038596687954850495,
139
+ "step": 10
140
+ },
141
+ {
142
+ "epoch": 0.07413647851727043,
143
+ "grad_norm": 0.8410467505455017,
144
+ "kl": 0.008653655648231506,
145
+ "learning_rate": 2.9729729729729736e-06,
146
+ "logps/chosen": -12.12862807053786,
147
+ "logps/rejected": -14.626085783305921,
148
+ "loss": 0.5025,
149
+ "rewards/chosen": 0.00037280140587916743,
150
+ "rewards/margins": 0.0007017212071520115,
151
+ "rewards/rejected": -0.00032891980127284403,
152
+ "step": 11
153
+ },
154
+ {
155
+ "epoch": 0.08087615838247683,
156
+ "grad_norm": 1.1322226524353027,
157
+ "kl": 0.004855245351791382,
158
+ "learning_rate": 3.2432432432432437e-06,
159
+ "logps/chosen": -12.374617682562935,
160
+ "logps/rejected": -13.656366620744977,
161
+ "loss": 0.5031,
162
+ "rewards/chosen": 0.0005280521905256642,
163
+ "rewards/margins": 0.0008345128310519079,
164
+ "rewards/rejected": -0.00030646064052624363,
165
+ "step": 12
166
+ },
167
+ {
168
+ "epoch": 0.08761583824768324,
169
+ "grad_norm": 0.9295361042022705,
170
+ "kl": 0.005203694105148315,
171
+ "learning_rate": 3.513513513513514e-06,
172
+ "logps/chosen": -11.762645945829505,
173
+ "logps/rejected": -15.171628824869792,
174
+ "loss": 0.5036,
175
+ "rewards/chosen": 0.000179896637906923,
176
+ "rewards/margins": -0.00038859495509634996,
177
+ "rewards/rejected": 0.000568491593003273,
178
+ "step": 13
179
+ },
180
+ {
181
+ "epoch": 0.09435551811288964,
182
+ "grad_norm": 0.8818777203559875,
183
+ "kl": 0.005161814391613007,
184
+ "learning_rate": 3.7837837837837844e-06,
185
+ "logps/chosen": -12.79319673426011,
186
+ "logps/rejected": -14.996563720703126,
187
+ "loss": 0.5027,
188
+ "rewards/chosen": 0.0002456202226526597,
189
+ "rewards/margins": -3.756979553430688e-05,
190
+ "rewards/rejected": 0.0002831900181869666,
191
+ "step": 14
192
+ },
193
+ {
194
+ "epoch": 0.10109519797809605,
195
+ "grad_norm": 0.9348131418228149,
196
+ "kl": 0.007587306201457977,
197
+ "learning_rate": 4.0540540540540545e-06,
198
+ "logps/chosen": -12.363379276160037,
199
+ "logps/rejected": -14.615665558845766,
200
+ "loss": 0.5047,
201
+ "rewards/chosen": 0.0002821806366696502,
202
+ "rewards/margins": -3.7112612848990366e-05,
203
+ "rewards/rejected": 0.00031929324951864057,
204
+ "step": 15
205
+ },
206
+ {
207
+ "epoch": 0.10783487784330244,
208
+ "grad_norm": 0.9292178153991699,
209
+ "kl": 0.006865538656711578,
210
+ "learning_rate": 4.324324324324325e-06,
211
+ "logps/chosen": -12.438332112630208,
212
+ "logps/rejected": -13.490447100471048,
213
+ "loss": 0.5025,
214
+ "rewards/chosen": 0.0007689857234557469,
215
+ "rewards/margins": 0.0013605295552634724,
216
+ "rewards/rejected": -0.0005915438318077256,
217
+ "step": 16
218
+ },
219
+ {
220
+ "epoch": 0.10783487784330244,
221
+ "eval_kl": 0.009904867969453335,
222
+ "eval_logps/chosen": -12.143770363298767,
223
+ "eval_logps/rejected": -16.15631522168321,
224
+ "eval_loss": 0.5038444399833679,
225
+ "eval_rewards/chosen": 0.0003763478032142058,
226
+ "eval_rewards/margins": -0.00032021752057413065,
227
+ "eval_rewards/rejected": 0.0006965653237883364,
228
+ "eval_runtime": 116.6391,
229
+ "eval_samples_per_second": 4.287,
230
+ "eval_steps_per_second": 1.072,
231
+ "step": 16
232
+ },
233
+ {
234
+ "epoch": 0.11457455770850884,
235
+ "grad_norm": 0.9445685148239136,
236
+ "kl": 0.008397102355957031,
237
+ "learning_rate": 4.594594594594596e-06,
238
+ "logps/chosen": -12.696038448449338,
239
+ "logps/rejected": -13.775493990990423,
240
+ "loss": 0.502,
241
+ "rewards/chosen": 0.0011519584240335407,
242
+ "rewards/margins": 0.00034577670240390577,
243
+ "rewards/rejected": 0.000806181721629635,
244
+ "step": 17
245
+ },
246
+ {
247
+ "epoch": 0.12131423757371525,
248
+ "grad_norm": 1.1721104383468628,
249
+ "kl": 0.009495265781879425,
250
+ "learning_rate": 4.864864864864866e-06,
251
+ "logps/chosen": -12.26165771484375,
252
+ "logps/rejected": -15.170940977154356,
253
+ "loss": 0.5034,
254
+ "rewards/chosen": 0.0009342439833187288,
255
+ "rewards/margins": 0.00023290609990047218,
256
+ "rewards/rejected": 0.0007013378834182566,
257
+ "step": 18
258
+ },
259
+ {
260
+ "epoch": 0.12805391743892164,
261
+ "grad_norm": 0.5523513555526733,
262
+ "kl": 0.01127266138792038,
263
+ "learning_rate": 5.135135135135135e-06,
264
+ "logps/chosen": -12.93335693359375,
265
+ "logps/rejected": -14.879100310496796,
266
+ "loss": 0.5016,
267
+ "rewards/chosen": 0.0013011722266674042,
268
+ "rewards/margins": 0.000309275094515238,
269
+ "rewards/rejected": 0.0009918971321521662,
270
+ "step": 19
271
+ },
272
+ {
273
+ "epoch": 0.13479359730412804,
274
+ "grad_norm": 0.8755555748939514,
275
+ "kl": 0.012708976864814758,
276
+ "learning_rate": 5.405405405405406e-06,
277
+ "logps/chosen": -12.150137624432963,
278
+ "logps/rejected": -32.386067708333336,
279
+ "loss": 0.5043,
280
+ "rewards/chosen": 0.0006054952259986631,
281
+ "rewards/margins": -0.007545214171866401,
282
+ "rewards/rejected": 0.008150709397865065,
283
+ "step": 20
284
+ },
285
+ {
286
+ "epoch": 0.14153327716933445,
287
+ "grad_norm": 1.4405056238174438,
288
+ "kl": 0.010751791298389435,
289
+ "learning_rate": 5.675675675675676e-06,
290
+ "logps/chosen": -11.948696899414063,
291
+ "logps/rejected": -13.955949671128216,
292
+ "loss": 0.5057,
293
+ "rewards/chosen": 0.00015734033659100532,
294
+ "rewards/margins": -0.0011103917296756717,
295
+ "rewards/rejected": 0.001267732066266677,
296
+ "step": 21
297
+ },
298
+ {
299
+ "epoch": 0.14827295703454085,
300
+ "grad_norm": 1.1817187070846558,
301
+ "kl": 0.013703078031539917,
302
+ "learning_rate": 5.945945945945947e-06,
303
+ "logps/chosen": -12.940583172966452,
304
+ "logps/rejected": -14.576173909505208,
305
+ "loss": 0.5025,
306
+ "rewards/chosen": 0.001265945132164394,
307
+ "rewards/margins": -0.000688387855303054,
308
+ "rewards/rejected": 0.001954332987467448,
309
+ "step": 22
310
+ },
311
+ {
312
+ "epoch": 0.15501263689974726,
313
+ "grad_norm": 6.005044937133789,
314
+ "kl": 0.00823821872472763,
315
+ "learning_rate": 6.2162162162162164e-06,
316
+ "logps/chosen": -12.0301513671875,
317
+ "logps/rejected": -21.867945053998163,
318
+ "loss": 0.5057,
319
+ "rewards/chosen": 0.00015112324617803097,
320
+ "rewards/margins": -0.000886952123769066,
321
+ "rewards/rejected": 0.001038075369947097,
322
+ "step": 23
323
+ },
324
+ {
325
+ "epoch": 0.16175231676495366,
326
+ "grad_norm": 0.8405526876449585,
327
+ "kl": 0.015235595405101776,
328
+ "learning_rate": 6.486486486486487e-06,
329
+ "logps/chosen": -11.505650983537947,
330
+ "logps/rejected": -15.960313468143857,
331
+ "loss": 0.5035,
332
+ "rewards/chosen": 0.0016303594623293196,
333
+ "rewards/margins": 0.0006677421369576103,
334
+ "rewards/rejected": 0.0009626173253717093,
335
+ "step": 24
336
+ },
337
+ {
338
+ "epoch": 0.16849199663016007,
339
+ "grad_norm": 0.8300200700759888,
340
+ "kl": 0.0065659284591674805,
341
+ "learning_rate": 6.7567567567567575e-06,
342
+ "logps/chosen": -13.24988525390625,
343
+ "logps/rejected": -15.025803786057692,
344
+ "loss": 0.5027,
345
+ "rewards/chosen": 0.0017699988186359406,
346
+ "rewards/margins": 0.00020917305961633335,
347
+ "rewards/rejected": 0.0015608257590196072,
348
+ "step": 25
349
+ },
350
+ {
351
+ "epoch": 0.17523167649536647,
352
+ "grad_norm": 1.233988642692566,
353
+ "kl": 0.01195797324180603,
354
+ "learning_rate": 7.027027027027028e-06,
355
+ "logps/chosen": -13.01835239955357,
356
+ "logps/rejected": -14.520358381600216,
357
+ "loss": 0.5048,
358
+ "rewards/chosen": 0.0011187103177819934,
359
+ "rewards/margins": 0.00018990398407569664,
360
+ "rewards/rejected": 0.0009288063337062968,
361
+ "step": 26
362
+ },
363
+ {
364
+ "epoch": 0.18197135636057288,
365
+ "grad_norm": 1.1816742420196533,
366
+ "kl": 0.01915910840034485,
367
+ "learning_rate": 7.297297297297298e-06,
368
+ "logps/chosen": -11.739371405707466,
369
+ "logps/rejected": -14.137767246791295,
370
+ "loss": 0.5033,
371
+ "rewards/chosen": 0.0018767921460999383,
372
+ "rewards/margins": 0.00035807005469761194,
373
+ "rewards/rejected": 0.0015187220914023264,
374
+ "step": 27
375
+ },
376
+ {
377
+ "epoch": 0.18871103622577928,
378
+ "grad_norm": 0.6885544061660767,
379
+ "kl": 0.01914915442466736,
380
+ "learning_rate": 7.567567567567569e-06,
381
+ "logps/chosen": -11.961525656960227,
382
+ "logps/rejected": -14.571149272303428,
383
+ "loss": 0.5019,
384
+ "rewards/chosen": 0.00266437367959456,
385
+ "rewards/margins": 0.0002963455541392574,
386
+ "rewards/rejected": 0.0023680281254553027,
387
+ "step": 28
388
+ },
389
+ {
390
+ "epoch": 0.1954507160909857,
391
+ "grad_norm": 0.5657196640968323,
392
+ "kl": 0.014864258468151093,
393
+ "learning_rate": 7.837837837837838e-06,
394
+ "logps/chosen": -14.268377685546875,
395
+ "logps/rejected": -19.700423803084934,
396
+ "loss": 0.5008,
397
+ "rewards/chosen": 0.0028473290801048277,
398
+ "rewards/margins": -0.0012096259150749596,
399
+ "rewards/rejected": 0.004056954995179787,
400
+ "step": 29
401
+ },
402
+ {
403
+ "epoch": 0.2021903959561921,
404
+ "grad_norm": 0.7043168544769287,
405
+ "kl": 0.011253654956817627,
406
+ "learning_rate": 8.108108108108109e-06,
407
+ "logps/chosen": -12.092169761657715,
408
+ "logps/rejected": -21.373014450073242,
409
+ "loss": 0.501,
410
+ "rewards/chosen": 0.002662060549482703,
411
+ "rewards/margins": 0.0013133042957633734,
412
+ "rewards/rejected": 0.0013487562537193298,
413
+ "step": 30
414
+ },
415
+ {
416
+ "epoch": 0.20893007582139847,
417
+ "grad_norm": 2.2267584800720215,
418
+ "kl": 0.013516634702682495,
419
+ "learning_rate": 8.378378378378378e-06,
420
+ "logps/chosen": -12.272081928868447,
421
+ "logps/rejected": -14.440037582859848,
422
+ "loss": 0.5081,
423
+ "rewards/chosen": 0.0015497208843308111,
424
+ "rewards/margins": 0.0003958268897682575,
425
+ "rewards/rejected": 0.0011538939945625536,
426
+ "step": 31
427
+ },
428
+ {
429
+ "epoch": 0.21566975568660487,
430
+ "grad_norm": 0.6539870500564575,
431
+ "kl": 0.0161922425031662,
432
+ "learning_rate": 8.64864864864865e-06,
433
+ "logps/chosen": -12.081860710592832,
434
+ "logps/rejected": -14.507745361328125,
435
+ "loss": 0.502,
436
+ "rewards/chosen": 0.0034291958984206707,
437
+ "rewards/margins": 0.0014160188462804348,
438
+ "rewards/rejected": 0.002013177052140236,
439
+ "step": 32
440
+ },
441
+ {
442
+ "epoch": 0.21566975568660487,
443
+ "eval_kl": 0.01995524764060974,
444
+ "eval_logps/chosen": -12.114965053951794,
445
+ "eval_logps/rejected": -16.145021293998195,
446
+ "eval_loss": 0.5019354224205017,
447
+ "eval_rewards/chosen": 0.0032570009274333045,
448
+ "eval_rewards/margins": 0.0014309825096479906,
449
+ "eval_rewards/rejected": 0.001826018417785314,
450
+ "eval_runtime": 116.8406,
451
+ "eval_samples_per_second": 4.279,
452
+ "eval_steps_per_second": 1.07,
453
+ "step": 32
454
+ },
455
+ {
456
+ "epoch": 0.22240943555181128,
457
+ "grad_norm": 0.6553356051445007,
458
+ "kl": 0.01506737619638443,
459
+ "learning_rate": 8.91891891891892e-06,
460
+ "logps/chosen": -11.576536290785846,
461
+ "logps/rejected": -14.449583943684896,
462
+ "loss": 0.502,
463
+ "rewards/chosen": 0.00276046509251875,
464
+ "rewards/margins": -0.0007693189908476437,
465
+ "rewards/rejected": 0.003529784083366394,
466
+ "step": 33
467
+ },
468
+ {
469
+ "epoch": 0.22914911541701768,
470
+ "grad_norm": 0.7586061358451843,
471
+ "kl": 0.017700180411338806,
472
+ "learning_rate": 9.189189189189191e-06,
473
+ "logps/chosen": -11.496786117553711,
474
+ "logps/rejected": -15.68021011352539,
475
+ "loss": 0.5019,
476
+ "rewards/chosen": 0.004237835295498371,
477
+ "rewards/margins": 0.0013041330967098475,
478
+ "rewards/rejected": 0.0029337021987885237,
479
+ "step": 34
480
+ },
481
+ {
482
+ "epoch": 0.2358887952822241,
483
+ "grad_norm": 1.1288914680480957,
484
+ "kl": 0.029283612966537476,
485
+ "learning_rate": 9.45945945945946e-06,
486
+ "logps/chosen": -12.882062358240928,
487
+ "logps/rejected": -15.065287272135416,
488
+ "loss": 0.5022,
489
+ "rewards/chosen": 0.0016913585845501193,
490
+ "rewards/margins": -0.00014708854644063154,
491
+ "rewards/rejected": 0.0018384471309907508,
492
+ "step": 35
493
+ },
494
+ {
495
+ "epoch": 0.2426284751474305,
496
+ "grad_norm": 0.3856205940246582,
497
+ "kl": 0.02245330810546875,
498
+ "learning_rate": 9.729729729729732e-06,
499
+ "logps/chosen": -11.367819213867188,
500
+ "logps/rejected": -13.798005047966452,
501
+ "loss": 0.5009,
502
+ "rewards/chosen": 0.0032590890924135843,
503
+ "rewards/margins": 4.188570321775071e-06,
504
+ "rewards/rejected": 0.0032549005220918093,
505
+ "step": 36
506
+ },
507
+ {
508
+ "epoch": 0.2493681550126369,
509
+ "grad_norm": 1.6323754787445068,
510
+ "kl": 0.01760883629322052,
511
+ "learning_rate": 1e-05,
512
+ "logps/chosen": -11.14263124819155,
513
+ "logps/rejected": -14.40852809596706,
514
+ "loss": 0.5031,
515
+ "rewards/chosen": 0.00255692419078615,
516
+ "rewards/margins": 0.00025125865911220305,
517
+ "rewards/rejected": 0.0023056655316739468,
518
+ "step": 37
519
+ },
520
+ {
521
+ "epoch": 0.2561078348778433,
522
+ "grad_norm": 0.5301430821418762,
523
+ "kl": 0.022485479712486267,
524
+ "learning_rate": 9.99799753559161e-06,
525
+ "logps/chosen": -12.966288248697916,
526
+ "logps/rejected": -14.867413689108457,
527
+ "loss": 0.5015,
528
+ "rewards/chosen": 0.0032486026485761006,
529
+ "rewards/margins": -0.00023628765461491605,
530
+ "rewards/rejected": 0.0034848903031910166,
531
+ "step": 38
532
+ },
533
+ {
534
+ "epoch": 0.2628475147430497,
535
+ "grad_norm": 1.3813471794128418,
536
+ "kl": 0.015269860625267029,
537
+ "learning_rate": 9.991991746311916e-06,
538
+ "logps/chosen": -12.546598237136315,
539
+ "logps/rejected": -14.897487095424108,
540
+ "loss": 0.5059,
541
+ "rewards/chosen": 0.003317505121231079,
542
+ "rewards/margins": 0.0015055560639926366,
543
+ "rewards/rejected": 0.0018119490572384425,
544
+ "step": 39
545
+ },
546
+ {
547
+ "epoch": 0.2695871946082561,
548
+ "grad_norm": 0.9622092843055725,
549
+ "kl": 0.024058394134044647,
550
+ "learning_rate": 9.981987442712634e-06,
551
+ "logps/chosen": -12.606202915736608,
552
+ "logps/rejected": -14.283474626212284,
553
+ "loss": 0.5025,
554
+ "rewards/chosen": 0.0032653006059782845,
555
+ "rewards/margins": 0.0015468575904522036,
556
+ "rewards/rejected": 0.001718443015526081,
557
+ "step": 40
558
+ },
559
+ {
560
+ "epoch": 0.2763268744734625,
561
+ "grad_norm": 0.4365544617176056,
562
+ "kl": 0.022776372730731964,
563
+ "learning_rate": 9.967992638098517e-06,
564
+ "logps/chosen": -12.364286295572917,
565
+ "logps/rejected": -12.308985093060661,
566
+ "loss": 0.5005,
567
+ "rewards/chosen": 0.0036993456383546193,
568
+ "rewards/margins": 0.0010577291396318697,
569
+ "rewards/rejected": 0.0026416164987227496,
570
+ "step": 41
571
+ },
572
+ {
573
+ "epoch": 0.2830665543386689,
574
+ "grad_norm": 0.5576804876327515,
575
+ "kl": 0.020083852112293243,
576
+ "learning_rate": 9.950018542108818e-06,
577
+ "logps/chosen": -12.115999009874132,
578
+ "logps/rejected": -12.540671212332589,
579
+ "loss": 0.5015,
580
+ "rewards/chosen": 0.003624026974042257,
581
+ "rewards/margins": 0.0011748063954569047,
582
+ "rewards/rejected": 0.002449220578585352,
583
+ "step": 42
584
+ },
585
+ {
586
+ "epoch": 0.2898062342038753,
587
+ "grad_norm": 0.6428059935569763,
588
+ "kl": 0.02687143161892891,
589
+ "learning_rate": 9.928079551738542e-06,
590
+ "logps/chosen": -11.715484619140625,
591
+ "logps/rejected": -14.010598182678223,
592
+ "loss": 0.5018,
593
+ "rewards/chosen": 0.004161514341831207,
594
+ "rewards/margins": 0.0009207972325384617,
595
+ "rewards/rejected": 0.0032407171092927456,
596
+ "step": 43
597
+ },
598
+ {
599
+ "epoch": 0.2965459140690817,
600
+ "grad_norm": 0.5016953349113464,
601
+ "kl": 0.0221768319606781,
602
+ "learning_rate": 9.902193239806634e-06,
603
+ "logps/chosen": -11.717826843261719,
604
+ "logps/rejected": -15.60338020324707,
605
+ "loss": 0.5004,
606
+ "rewards/chosen": 0.004171658307313919,
607
+ "rewards/margins": 0.0014381594955921173,
608
+ "rewards/rejected": 0.0027334988117218018,
609
+ "step": 44
610
+ },
611
+ {
612
+ "epoch": 0.3032855939342881,
613
+ "grad_norm": 0.6543410420417786,
614
+ "kl": 0.025154344737529755,
615
+ "learning_rate": 9.872380340880416e-06,
616
+ "logps/chosen": -12.942235946655273,
617
+ "logps/rejected": -14.092964172363281,
618
+ "loss": 0.5008,
619
+ "rewards/chosen": 0.004342000465840101,
620
+ "rewards/margins": 0.0014070440083742142,
621
+ "rewards/rejected": 0.002934956457465887,
622
+ "step": 45
623
+ },
624
+ {
625
+ "epoch": 0.3100252737994945,
626
+ "grad_norm": 0.8556452393531799,
627
+ "kl": 0.03452587127685547,
628
+ "learning_rate": 9.838664734667496e-06,
629
+ "logps/chosen": -10.915324244005927,
630
+ "logps/rejected": -14.422269984654017,
631
+ "loss": 0.5012,
632
+ "rewards/chosen": 0.003742930190316562,
633
+ "rewards/margins": 0.0028417849151665354,
634
+ "rewards/rejected": 0.0009011452751500266,
635
+ "step": 46
636
+ },
637
+ {
638
+ "epoch": 0.3167649536647009,
639
+ "grad_norm": 0.4844423234462738,
640
+ "kl": 0.028471767902374268,
641
+ "learning_rate": 9.801073426888447e-06,
642
+ "logps/chosen": -11.690447407384072,
643
+ "logps/rejected": -14.260365804036459,
644
+ "loss": 0.5007,
645
+ "rewards/chosen": 0.0039828251446447065,
646
+ "rewards/margins": -0.00019254163271753902,
647
+ "rewards/rejected": 0.0041753667773622456,
648
+ "step": 47
649
+ },
650
+ {
651
+ "epoch": 0.3235046335299073,
652
+ "grad_norm": 0.6494544148445129,
653
+ "kl": 0.027864396572113037,
654
+ "learning_rate": 9.759636527645633e-06,
655
+ "logps/chosen": -12.518224080403646,
656
+ "logps/rejected": -14.488565717424665,
657
+ "loss": 0.5026,
658
+ "rewards/chosen": 0.003701325919893053,
659
+ "rewards/margins": 0.0002690214545480788,
660
+ "rewards/rejected": 0.003432304465344974,
661
+ "step": 48
662
+ },
663
+ {
664
+ "epoch": 0.3235046335299073,
665
+ "eval_kl": 0.03345843777060509,
666
+ "eval_logps/chosen": -12.096400513242713,
667
+ "eval_logps/rejected": -16.13582151116877,
668
+ "eval_loss": 0.5012729167938232,
669
+ "eval_rewards/chosen": 0.005113235503568778,
670
+ "eval_rewards/margins": 0.002367174851576145,
671
+ "eval_rewards/rejected": 0.0027460606519926324,
672
+ "eval_runtime": 117.0064,
673
+ "eval_samples_per_second": 4.273,
674
+ "eval_steps_per_second": 1.068,
675
+ "step": 48
676
+ },
677
+ {
678
+ "epoch": 0.33024431339511373,
679
+ "grad_norm": 0.47848811745643616,
680
+ "kl": 0.02880626916885376,
681
+ "learning_rate": 9.714387227305422e-06,
682
+ "logps/chosen": -11.936473301478795,
683
+ "logps/rejected": -14.11018541124132,
684
+ "loss": 0.5007,
685
+ "rewards/chosen": 0.003300180658698082,
686
+ "rewards/margins": -0.00017501186165544717,
687
+ "rewards/rejected": 0.003475192520353529,
688
+ "step": 49
689
+ },
690
+ {
691
+ "epoch": 0.33698399326032014,
692
+ "grad_norm": 1.9203778505325317,
693
+ "kl": 0.03367514908313751,
694
+ "learning_rate": 9.665361769913187e-06,
695
+ "logps/chosen": -11.758908658414274,
696
+ "logps/rejected": -14.825486924913195,
697
+ "loss": 0.5021,
698
+ "rewards/chosen": 0.003634077471655768,
699
+ "rewards/margins": -7.272407308116452e-05,
700
+ "rewards/rejected": 0.0037068015447369327,
701
+ "step": 50
702
+ },
703
+ {
704
+ "epoch": 0.34372367312552654,
705
+ "grad_norm": 0.5142046809196472,
706
+ "kl": 0.054336875677108765,
707
+ "learning_rate": 9.612599424162344e-06,
708
+ "logps/chosen": -12.71717924230239,
709
+ "logps/rejected": -13.75819091796875,
710
+ "loss": 0.5002,
711
+ "rewards/chosen": 0.004144738702213063,
712
+ "rewards/margins": 0.0004953872047218622,
713
+ "rewards/rejected": 0.003649351497491201,
714
+ "step": 51
715
+ },
716
+ {
717
+ "epoch": 0.35046335299073295,
718
+ "grad_norm": 5.670271873474121,
719
+ "kl": 0.03541412204504013,
720
+ "learning_rate": 9.55614245194068e-06,
721
+ "logps/chosen": -11.491986674647178,
722
+ "logps/rejected": -13.510441635594223,
723
+ "loss": 0.5083,
724
+ "rewards/chosen": 0.0023518490695184275,
725
+ "rewards/margins": -0.003151654592648746,
726
+ "rewards/rejected": 0.005503503662167173,
727
+ "step": 52
728
+ },
729
+ {
730
+ "epoch": 0.35720303285593935,
731
+ "grad_norm": 0.394388347864151,
732
+ "kl": 0.030839860439300537,
733
+ "learning_rate": 9.496036074479184e-06,
734
+ "logps/chosen": -12.102163380589978,
735
+ "logps/rejected": -14.92567138671875,
736
+ "loss": 0.5006,
737
+ "rewards/chosen": 0.0048105048722234265,
738
+ "rewards/margins": 0.0003774762594053894,
739
+ "rewards/rejected": 0.004433028612818037,
740
+ "step": 53
741
+ },
742
+ {
743
+ "epoch": 0.36394271272114576,
744
+ "grad_norm": 0.6340323090553284,
745
+ "kl": 0.024696357548236847,
746
+ "learning_rate": 9.432328436130493e-06,
747
+ "logps/chosen": -12.709688186645508,
748
+ "logps/rejected": -13.567533493041992,
749
+ "loss": 0.5013,
750
+ "rewards/chosen": 0.005041294265538454,
751
+ "rewards/margins": 0.0015526015777140856,
752
+ "rewards/rejected": 0.0034886926878243685,
753
+ "step": 54
754
+ },
755
+ {
756
+ "epoch": 0.37068239258635216,
757
+ "grad_norm": 1.0442602634429932,
758
+ "kl": 0.030310869216918945,
759
+ "learning_rate": 9.365070565805941e-06,
760
+ "logps/chosen": -11.8351806640625,
761
+ "logps/rejected": -12.171940244477371,
762
+ "loss": 0.5021,
763
+ "rewards/chosen": 0.0029652584876332963,
764
+ "rewards/margins": -0.0006688115264981841,
765
+ "rewards/rejected": 0.0036340700141314804,
766
+ "step": 55
767
+ },
768
+ {
769
+ "epoch": 0.37742207245155857,
770
+ "grad_norm": 0.41058778762817383,
771
+ "kl": 0.03595775365829468,
772
+ "learning_rate": 9.294316336102132e-06,
773
+ "logps/chosen": -12.58010056439568,
774
+ "logps/rejected": -16.123766072591145,
775
+ "loss": 0.5008,
776
+ "rewards/chosen": 0.00409250268164803,
777
+ "rewards/margins": 0.00040192935688822913,
778
+ "rewards/rejected": 0.003690573324759801,
779
+ "step": 56
780
+ },
781
+ {
782
+ "epoch": 0.38416175231676497,
783
+ "grad_norm": 0.08168064057826996,
784
+ "kl": 0.031137794256210327,
785
+ "learning_rate": 9.220122420149753e-06,
786
+ "logps/chosen": -12.088414510091146,
787
+ "logps/rejected": -15.25335693359375,
788
+ "loss": 0.5,
789
+ "rewards/chosen": 0.005210169156392415,
790
+ "rewards/margins": -0.0003060570826717455,
791
+ "rewards/rejected": 0.005516226239064161,
792
+ "step": 57
793
+ },
794
+ {
795
+ "epoch": 0.3909014321819714,
796
+ "grad_norm": 0.6683095693588257,
797
+ "kl": 0.03323546051979065,
798
+ "learning_rate": 9.142548246219212e-06,
799
+ "logps/chosen": -12.333134831608954,
800
+ "logps/rejected": -12.924123128255209,
801
+ "loss": 0.5016,
802
+ "rewards/chosen": 0.003185961697552655,
803
+ "rewards/margins": -0.0001546228388408285,
804
+ "rewards/rejected": 0.0033405845363934836,
805
+ "step": 58
806
+ },
807
+ {
808
+ "epoch": 0.3976411120471778,
809
+ "grad_norm": 0.44280490279197693,
810
+ "kl": 0.040286846458911896,
811
+ "learning_rate": 9.06165595011943e-06,
812
+ "logps/chosen": -11.647547040666852,
813
+ "logps/rejected": -13.499364217122396,
814
+ "loss": 0.5015,
815
+ "rewards/chosen": 0.004377785005739757,
816
+ "rewards/margins": -0.0010754091753846125,
817
+ "rewards/rejected": 0.00545319418112437,
818
+ "step": 59
819
+ },
820
+ {
821
+ "epoch": 0.4043807919123842,
822
+ "grad_norm": 0.253917396068573,
823
+ "kl": 0.02998022735118866,
824
+ "learning_rate": 8.97751032542795e-06,
825
+ "logps/chosen": -10.375978168688322,
826
+ "logps/rejected": -14.37469012920673,
827
+ "loss": 0.4998,
828
+ "rewards/chosen": 0.005374729241195478,
829
+ "rewards/margins": 0.0037813447792882375,
830
+ "rewards/rejected": 0.00159338446190724,
831
+ "step": 60
832
+ },
833
+ {
834
+ "epoch": 0.4111204717775906,
835
+ "grad_norm": 0.22803018987178802,
836
+ "kl": 0.0411323606967926,
837
+ "learning_rate": 8.890178771592198e-06,
838
+ "logps/chosen": -12.261529541015625,
839
+ "logps/rejected": -14.262212416704964,
840
+ "loss": 0.5007,
841
+ "rewards/chosen": 0.0042099177837371824,
842
+ "rewards/margins": -0.00034529444049386394,
843
+ "rewards/rejected": 0.004555212224231046,
844
+ "step": 61
845
+ },
846
+ {
847
+ "epoch": 0.41786015164279694,
848
+ "grad_norm": 0.6412160992622375,
849
+ "kl": 0.03793022781610489,
850
+ "learning_rate": 8.799731239943488e-06,
851
+ "logps/chosen": -12.091997034409466,
852
+ "logps/rejected": -15.869469197591146,
853
+ "loss": 0.5016,
854
+ "rewards/chosen": 0.003918992245898527,
855
+ "rewards/margins": -4.28084064932506e-06,
856
+ "rewards/rejected": 0.003923273086547852,
857
+ "step": 62
858
+ },
859
+ {
860
+ "epoch": 0.42459983150800334,
861
+ "grad_norm": 0.8344623446464539,
862
+ "kl": 0.04702252149581909,
863
+ "learning_rate": 8.706240177667003e-06,
864
+ "logps/chosen": -11.470931159125435,
865
+ "logps/rejected": -14.442997523716517,
866
+ "loss": 0.5058,
867
+ "rewards/chosen": 0.0035239855448404946,
868
+ "rewards/margins": -0.0012164868059612454,
869
+ "rewards/rejected": 0.00474047235080174,
870
+ "step": 63
871
+ },
872
+ {
873
+ "epoch": 0.43133951137320975,
874
+ "grad_norm": 0.8020732402801514,
875
+ "kl": 0.039898380637168884,
876
+ "learning_rate": 8.609780469772623e-06,
877
+ "logps/chosen": -14.423152378627233,
878
+ "logps/rejected": -14.408963290127842,
879
+ "loss": 0.5021,
880
+ "rewards/chosen": 0.004889163587774549,
881
+ "rewards/margins": 0.0007666330542657283,
882
+ "rewards/rejected": 0.004122530533508821,
883
+ "step": 64
884
+ },
885
+ {
886
+ "epoch": 0.43133951137320975,
887
+ "eval_kl": 0.04057914763689041,
888
+ "eval_logps/chosen": -12.08929306509249,
889
+ "eval_logps/rejected": -16.12696370148917,
890
+ "eval_loss": 0.5015159249305725,
891
+ "eval_rewards/chosen": 0.005824030782075207,
892
+ "eval_rewards/margins": 0.0021923597992461368,
893
+ "eval_rewards/rejected": 0.00363167098282907,
894
+ "eval_runtime": 117.006,
895
+ "eval_samples_per_second": 4.273,
896
+ "eval_steps_per_second": 1.068,
897
+ "step": 64
898
+ },
899
+ {
900
+ "epoch": 0.43807919123841615,
901
+ "grad_norm": 0.4794953465461731,
902
+ "kl": 0.06649504601955414,
903
+ "learning_rate": 8.510429379113114e-06,
904
+ "logps/chosen": -12.707015991210938,
905
+ "logps/rejected": -14.950631534352022,
906
+ "loss": 0.5008,
907
+ "rewards/chosen": 0.004046256840229035,
908
+ "rewards/margins": 0.0002097017624798944,
909
+ "rewards/rejected": 0.00383655507774914,
910
+ "step": 65
911
+ },
912
+ {
913
+ "epoch": 0.44481887110362256,
914
+ "grad_norm": 0.3255169689655304,
915
+ "kl": 0.03476836532354355,
916
+ "learning_rate": 8.408266484497664e-06,
917
+ "logps/chosen": -12.403576190655048,
918
+ "logps/rejected": -14.6284035130551,
919
+ "loss": 0.4999,
920
+ "rewards/chosen": 0.008390213434512798,
921
+ "rewards/margins": 0.004481334163833725,
922
+ "rewards/rejected": 0.0039088792706790726,
923
+ "step": 66
924
+ },
925
+ {
926
+ "epoch": 0.45155855096882896,
927
+ "grad_norm": 0.5565071105957031,
928
+ "kl": 0.036111876368522644,
929
+ "learning_rate": 8.303373616950408e-06,
930
+ "logps/chosen": -12.935804578993055,
931
+ "logps/rejected": -15.007106236049108,
932
+ "loss": 0.5002,
933
+ "rewards/chosen": 0.004996686346001095,
934
+ "rewards/margins": 0.0004498769366551962,
935
+ "rewards/rejected": 0.004546809409345899,
936
+ "step": 67
937
+ },
938
+ {
939
+ "epoch": 0.45829823083403537,
940
+ "grad_norm": 0.49582043290138245,
941
+ "kl": 0.03154793381690979,
942
+ "learning_rate": 8.195834794164925e-06,
943
+ "logps/chosen": -12.002599225725447,
944
+ "logps/rejected": -14.013414054081357,
945
+ "loss": 0.501,
946
+ "rewards/chosen": 0.00556163489818573,
947
+ "rewards/margins": 0.000681957808034173,
948
+ "rewards/rejected": 0.004879677090151557,
949
+ "step": 68
950
+ },
951
+ {
952
+ "epoch": 0.4650379106992418,
953
+ "grad_norm": 0.5658519268035889,
954
+ "kl": 0.029201030731201172,
955
+ "learning_rate": 8.085736153207277e-06,
956
+ "logps/chosen": -12.031069587258731,
957
+ "logps/rejected": -14.41396484375,
958
+ "loss": 0.5007,
959
+ "rewards/chosen": 0.004736459868795732,
960
+ "rewards/margins": -9.984157833398469e-06,
961
+ "rewards/rejected": 0.00474644402662913,
962
+ "step": 69
963
+ },
964
+ {
965
+ "epoch": 0.4717775905644482,
966
+ "grad_norm": 0.3203012943267822,
967
+ "kl": 0.027451664209365845,
968
+ "learning_rate": 7.973165881521435e-06,
969
+ "logps/chosen": -11.853369140625,
970
+ "logps/rejected": -13.254534491177264,
971
+ "loss": 0.5003,
972
+ "rewards/chosen": 0.005844617741448539,
973
+ "rewards/margins": -0.0013138286732687736,
974
+ "rewards/rejected": 0.007158446414717312,
975
+ "step": 70
976
+ },
977
+ {
978
+ "epoch": 0.4785172704296546,
979
+ "grad_norm": 0.360921710729599,
980
+ "kl": 0.03561514616012573,
981
+ "learning_rate": 7.858214146292394e-06,
982
+ "logps/chosen": -12.516302926199776,
983
+ "logps/rejected": -14.650658501519096,
984
+ "loss": 0.5001,
985
+ "rewards/chosen": 0.005021435873849052,
986
+ "rewards/margins": 0.001271195236652617,
987
+ "rewards/rejected": 0.0037502406371964347,
988
+ "step": 71
989
+ },
990
+ {
991
+ "epoch": 0.485256950294861,
992
+ "grad_norm": 0.6086096167564392,
993
+ "kl": 0.04020478576421738,
994
+ "learning_rate": 7.74097302222355e-06,
995
+ "logps/chosen": -11.879875921433971,
996
+ "logps/rejected": -15.616689970999053,
997
+ "loss": 0.5002,
998
+ "rewards/chosen": 0.004931771466808934,
999
+ "rewards/margins": 0.0010645643153381719,
1000
+ "rewards/rejected": 0.0038672071514707623,
1001
+ "step": 72
1002
+ },
1003
+ {
1004
+ "epoch": 0.4919966301600674,
1005
+ "grad_norm": 0.19885578751564026,
1006
+ "kl": 0.026517115533351898,
1007
+ "learning_rate": 7.621536417786159e-06,
1008
+ "logps/chosen": -11.950851440429688,
1009
+ "logps/rejected": -13.586245368508731,
1010
+ "loss": 0.5004,
1011
+ "rewards/chosen": 0.004206822315851847,
1012
+ "rewards/margins": 0.0002687697317086011,
1013
+ "rewards/rejected": 0.003938052584143246,
1014
+ "step": 73
1015
+ },
1016
+ {
1017
+ "epoch": 0.4987363100252738,
1018
+ "grad_norm": 0.6865639090538025,
1019
+ "kl": 0.04328171908855438,
1020
+ "learning_rate": 7.500000000000001e-06,
1021
+ "logps/chosen": -10.888592311314174,
1022
+ "logps/rejected": -13.478512234157986,
1023
+ "loss": 0.5006,
1024
+ "rewards/chosen": 0.00372953899204731,
1025
+ "rewards/margins": -0.0011417826430665124,
1026
+ "rewards/rejected": 0.004871321635113822,
1027
+ "step": 74
1028
+ },
1029
+ {
1030
+ "epoch": 0.5054759898904801,
1031
+ "grad_norm": 0.9542965888977051,
1032
+ "kl": 0.03713301569223404,
1033
+ "learning_rate": 7.37646111780545e-06,
1034
+ "logps/chosen": -12.198631286621094,
1035
+ "logps/rejected": -14.544124603271484,
1036
+ "loss": 0.501,
1037
+ "rewards/chosen": 0.0044771101325750354,
1038
+ "rewards/margins": 0.0009494200969735783,
1039
+ "rewards/rejected": 0.003527690035601457,
1040
+ "step": 75
1041
+ },
1042
+ {
1043
+ "epoch": 0.5122156697556866,
1044
+ "grad_norm": 0.2528373599052429,
1045
+ "kl": 0.027506500482559204,
1046
+ "learning_rate": 7.251018724088367e-06,
1047
+ "logps/chosen": -13.080149332682291,
1048
+ "logps/rejected": -13.036513869826859,
1049
+ "loss": 0.5005,
1050
+ "rewards/chosen": 0.006512647425686872,
1051
+ "rewards/margins": 0.0014634541980735778,
1052
+ "rewards/rejected": 0.005049193227613294,
1053
+ "step": 76
1054
+ },
1055
+ {
1056
+ "epoch": 0.518955349620893,
1057
+ "grad_norm": 0.22596140205860138,
1058
+ "kl": 0.040385909378528595,
1059
+ "learning_rate": 7.12377329642024e-06,
1060
+ "logps/chosen": -11.61421907865084,
1061
+ "logps/rejected": -15.499625758120889,
1062
+ "loss": 0.4998,
1063
+ "rewards/chosen": 0.00586964304630573,
1064
+ "rewards/margins": 0.002287208219530129,
1065
+ "rewards/rejected": 0.003582434826775601,
1066
+ "step": 77
1067
+ },
1068
+ {
1069
+ "epoch": 0.5256950294860994,
1070
+ "grad_norm": 0.2265264242887497,
1071
+ "kl": 0.02709801495075226,
1072
+ "learning_rate": 6.994826756577082e-06,
1073
+ "logps/chosen": -10.6973180356233,
1074
+ "logps/rejected": -12.833906964557928,
1075
+ "loss": 0.5005,
1076
+ "rewards/chosen": 0.004355713725090027,
1077
+ "rewards/margins": 0.00035547337880948663,
1078
+ "rewards/rejected": 0.00400024034628054,
1079
+ "step": 78
1080
+ },
1081
+ {
1082
+ "epoch": 0.5324347093513058,
1083
+ "grad_norm": 0.49220699071884155,
1084
+ "kl": 0.026310235261917114,
1085
+ "learning_rate": 6.864282388901544e-06,
1086
+ "logps/chosen": -14.165910391971982,
1087
+ "logps/rejected": -19.099337332589286,
1088
+ "loss": 0.5,
1089
+ "rewards/chosen": 0.004868762246493636,
1090
+ "rewards/margins": 0.002894112944896586,
1091
+ "rewards/rejected": 0.00197464930159705,
1092
+ "step": 79
1093
+ },
1094
+ {
1095
+ "epoch": 0.5391743892165122,
1096
+ "grad_norm": 0.6800411343574524,
1097
+ "kl": 0.046130433678627014,
1098
+ "learning_rate": 6.732244757573619e-06,
1099
+ "logps/chosen": -12.125639073988971,
1100
+ "logps/rejected": -13.411381022135417,
1101
+ "loss": 0.5017,
1102
+ "rewards/chosen": 0.005388632416725159,
1103
+ "rewards/margins": 0.000751591225465139,
1104
+ "rewards/rejected": 0.00463704119126002,
1105
+ "step": 80
1106
+ },
1107
+ {
1108
+ "epoch": 0.5391743892165122,
1109
+ "eval_kl": 0.04340193793177605,
1110
+ "eval_logps/chosen": -12.083256332329036,
1111
+ "eval_logps/rejected": -16.12648952222473,
1112
+ "eval_loss": 0.501190721988678,
1113
+ "eval_rewards/chosen": 0.006427717315776466,
1114
+ "eval_rewards/margins": 0.0027484710674554836,
1115
+ "eval_rewards/rejected": 0.0036792462483209824,
1116
+ "eval_runtime": 117.1394,
1117
+ "eval_samples_per_second": 4.268,
1118
+ "eval_steps_per_second": 1.067,
1119
+ "step": 80
1120
+ },
1121
+ {
1122
+ "epoch": 0.5459140690817186,
1123
+ "grad_norm": 0.6403890252113342,
1124
+ "kl": 0.0381980761885643,
1125
+ "learning_rate": 6.598819622856227e-06,
1126
+ "logps/chosen": -12.278984656700722,
1127
+ "logps/rejected": -14.581547787314967,
1128
+ "loss": 0.5024,
1129
+ "rewards/chosen": 0.005194329871581151,
1130
+ "rewards/margins": 0.000755395906174231,
1131
+ "rewards/rejected": 0.00443893396540692,
1132
+ "step": 81
1133
+ },
1134
+ {
1135
+ "epoch": 0.552653748946925,
1136
+ "grad_norm": 0.3272389769554138,
1137
+ "kl": 0.029798954725265503,
1138
+ "learning_rate": 6.464113856382752e-06,
1139
+ "logps/chosen": -11.796954530658144,
1140
+ "logps/rejected": -14.982394310735888,
1141
+ "loss": 0.5005,
1142
+ "rewards/chosen": 0.005100182511589744,
1143
+ "rewards/margins": 0.000986845306287413,
1144
+ "rewards/rejected": 0.004113337205302331,
1145
+ "step": 82
1146
+ },
1147
+ {
1148
+ "epoch": 0.5593934288121314,
1149
+ "grad_norm": 0.549321711063385,
1150
+ "kl": 0.05309329181909561,
1151
+ "learning_rate": 6.328235355554382e-06,
1152
+ "logps/chosen": -12.00343715122768,
1153
+ "logps/rejected": -15.50714742726293,
1154
+ "loss": 0.5003,
1155
+ "rewards/chosen": 0.005881215419088091,
1156
+ "rewards/margins": 0.003232161487851824,
1157
+ "rewards/rejected": 0.002649053931236267,
1158
+ "step": 83
1159
+ },
1160
+ {
1161
+ "epoch": 0.5661331086773378,
1162
+ "grad_norm": 0.6684555411338806,
1163
+ "kl": 0.035583049058914185,
1164
+ "learning_rate": 6.191292957115825e-06,
1165
+ "logps/chosen": -12.240760294596354,
1166
+ "logps/rejected": -15.023827945484834,
1167
+ "loss": 0.5017,
1168
+ "rewards/chosen": 0.003915249804655711,
1169
+ "rewards/margins": -0.0005642145579936456,
1170
+ "rewards/rejected": 0.004479464362649357,
1171
+ "step": 84
1172
+ },
1173
+ {
1174
+ "epoch": 0.5728727885425442,
1175
+ "grad_norm": 1.0044106245040894,
1176
+ "kl": 0.04308219999074936,
1177
+ "learning_rate": 6.053396349978632e-06,
1178
+ "logps/chosen": -11.776771791519657,
1179
+ "logps/rejected": -12.308844364050662,
1180
+ "loss": 0.5021,
1181
+ "rewards/chosen": 0.004112924779615094,
1182
+ "rewards/margins": 0.000594291009394887,
1183
+ "rewards/rejected": 0.0035186337702202072,
1184
+ "step": 85
1185
+ },
1186
+ {
1187
+ "epoch": 0.5796124684077506,
1188
+ "grad_norm": 0.4596560299396515,
1189
+ "kl": 0.033609502017498016,
1190
+ "learning_rate": 5.914655987361934e-06,
1191
+ "logps/chosen": -11.814070268110795,
1192
+ "logps/rejected": -13.036851452242944,
1193
+ "loss": 0.5013,
1194
+ "rewards/chosen": 0.0055642552448041515,
1195
+ "rewards/margins": 0.002487225062919386,
1196
+ "rewards/rejected": 0.0030770301818847656,
1197
+ "step": 86
1198
+ },
1199
+ {
1200
+ "epoch": 0.586352148272957,
1201
+ "grad_norm": 0.3680490255355835,
1202
+ "kl": 0.037708625197410583,
1203
+ "learning_rate": 5.77518299832099e-06,
1204
+ "logps/chosen": -11.723768967848558,
1205
+ "logps/rejected": -13.26518410130551,
1206
+ "loss": 0.5004,
1207
+ "rewards/chosen": 0.004937492884122408,
1208
+ "rewards/margins": -0.00045244181566392837,
1209
+ "rewards/rejected": 0.005389934699786336,
1210
+ "step": 87
1211
+ },
1212
+ {
1213
+ "epoch": 0.5930918281381634,
1214
+ "grad_norm": 0.5304110050201416,
1215
+ "kl": 0.05044492334127426,
1216
+ "learning_rate": 5.635089098734394e-06,
1217
+ "logps/chosen": -13.223031997680664,
1218
+ "logps/rejected": -14.862555503845215,
1219
+ "loss": 0.5013,
1220
+ "rewards/chosen": 0.00417838990688324,
1221
+ "rewards/margins": 0.00042380671948194504,
1222
+ "rewards/rejected": 0.0037545831874012947,
1223
+ "step": 88
1224
+ },
1225
+ {
1226
+ "epoch": 0.5998315080033698,
1227
+ "grad_norm": 0.41980600357055664,
1228
+ "kl": 0.03524252772331238,
1229
+ "learning_rate": 5.49448650182125e-06,
1230
+ "logps/chosen": -12.427776439769849,
1231
+ "logps/rejected": -13.90366843894676,
1232
+ "loss": 0.4999,
1233
+ "rewards/chosen": 0.006115401918823655,
1234
+ "rewards/margins": 0.0020641214004508013,
1235
+ "rewards/rejected": 0.004051280518372853,
1236
+ "step": 89
1237
+ },
1238
+ {
1239
+ "epoch": 0.6065711878685762,
1240
+ "grad_norm": 0.5898346304893494,
1241
+ "kl": 0.031301796436309814,
1242
+ "learning_rate": 5.353487828259973e-06,
1243
+ "logps/chosen": -11.024669647216797,
1244
+ "logps/rejected": -21.888626098632812,
1245
+ "loss": 0.5011,
1246
+ "rewards/chosen": 0.00403784541413188,
1247
+ "rewards/margins": 0.006319438107311726,
1248
+ "rewards/rejected": -0.002281592693179846,
1249
+ "step": 90
1250
+ },
1251
+ {
1252
+ "epoch": 0.6133108677337826,
1253
+ "grad_norm": 0.35757502913475037,
1254
+ "kl": 0.03623806685209274,
1255
+ "learning_rate": 5.212206015980742e-06,
1256
+ "logps/chosen": -12.488767736098346,
1257
+ "logps/rejected": -15.077747599283855,
1258
+ "loss": 0.5011,
1259
+ "rewards/chosen": 0.004482402959290673,
1260
+ "rewards/margins": 0.0013234442063406403,
1261
+ "rewards/rejected": 0.0031589587529500325,
1262
+ "step": 91
1263
+ },
1264
+ {
1265
+ "epoch": 0.620050547598989,
1266
+ "grad_norm": 0.42492297291755676,
1267
+ "kl": 0.03657727688550949,
1268
+ "learning_rate": 5.070754229703811e-06,
1269
+ "logps/chosen": -12.937924194335938,
1270
+ "logps/rejected": -15.475282556870404,
1271
+ "loss": 0.5009,
1272
+ "rewards/chosen": 0.006935717165470123,
1273
+ "rewards/margins": 0.002569316239917979,
1274
+ "rewards/rejected": 0.004366400925552144,
1275
+ "step": 92
1276
+ },
1277
+ {
1278
+ "epoch": 0.6267902274641954,
1279
+ "grad_norm": 0.5660274028778076,
1280
+ "kl": 0.04550507664680481,
1281
+ "learning_rate": 4.929245770296191e-06,
1282
+ "logps/chosen": -11.731661478678385,
1283
+ "logps/rejected": -15.350058419363839,
1284
+ "loss": 0.5016,
1285
+ "rewards/chosen": 0.004625084913439221,
1286
+ "rewards/margins": 0.001465273725371512,
1287
+ "rewards/rejected": 0.0031598111880677088,
1288
+ "step": 93
1289
+ },
1290
+ {
1291
+ "epoch": 0.6335299073294018,
1292
+ "grad_norm": 0.6102348566055298,
1293
+ "kl": 0.02795557677745819,
1294
+ "learning_rate": 4.78779398401926e-06,
1295
+ "logps/chosen": -12.868092256433824,
1296
+ "logps/rejected": -15.27595723470052,
1297
+ "loss": 0.5015,
1298
+ "rewards/chosen": 0.00481748317970949,
1299
+ "rewards/margins": 0.0005204015502742691,
1300
+ "rewards/rejected": 0.004297081629435221,
1301
+ "step": 94
1302
+ },
1303
+ {
1304
+ "epoch": 0.6402695871946082,
1305
+ "grad_norm": 0.34281784296035767,
1306
+ "kl": 0.030268676578998566,
1307
+ "learning_rate": 4.646512171740028e-06,
1308
+ "logps/chosen": -12.013111943783967,
1309
+ "logps/rejected": -14.691796279535062,
1310
+ "loss": 0.5016,
1311
+ "rewards/chosen": 0.0030449654745019,
1312
+ "rewards/margins": -0.0003336467646844563,
1313
+ "rewards/rejected": 0.0033786122391863565,
1314
+ "step": 95
1315
+ },
1316
+ {
1317
+ "epoch": 0.6470092670598147,
1318
+ "grad_norm": 0.4762212932109833,
1319
+ "kl": 0.041235461831092834,
1320
+ "learning_rate": 4.505513498178752e-06,
1321
+ "logps/chosen": -12.398789760044643,
1322
+ "logps/rejected": -16.021841114964978,
1323
+ "loss": 0.5003,
1324
+ "rewards/chosen": 0.006167218514851161,
1325
+ "rewards/margins": 0.0024321594775603907,
1326
+ "rewards/rejected": 0.0037350590372907705,
1327
+ "step": 96
1328
+ },
1329
+ {
1330
+ "epoch": 0.6470092670598147,
1331
+ "eval_kl": 0.04306062310934067,
1332
+ "eval_logps/chosen": -12.08118387401906,
1333
+ "eval_logps/rejected": -16.131139651962997,
1334
+ "eval_loss": 0.5007099509239197,
1335
+ "eval_rewards/chosen": 0.006635084815089478,
1336
+ "eval_rewards/margins": 0.003420951039467933,
1337
+ "eval_rewards/rejected": 0.003214133775621545,
1338
+ "eval_runtime": 117.1734,
1339
+ "eval_samples_per_second": 4.267,
1340
+ "eval_steps_per_second": 1.067,
1341
+ "step": 96
1342
+ },
1343
+ {
1344
+ "epoch": 0.6537489469250211,
1345
+ "grad_norm": 0.6140352487564087,
1346
+ "kl": 0.024514369666576385,
1347
+ "learning_rate": 4.364910901265607e-06,
1348
+ "logps/chosen": -12.712422688802084,
1349
+ "logps/rejected": -15.036084856305804,
1350
+ "loss": 0.5012,
1351
+ "rewards/chosen": 0.005062463382879893,
1352
+ "rewards/margins": 0.0017749588226988206,
1353
+ "rewards/rejected": 0.0032875045601810727,
1354
+ "step": 97
1355
+ },
1356
+ {
1357
+ "epoch": 0.6604886267902275,
1358
+ "grad_norm": 1.0478788614273071,
1359
+ "kl": 0.0483347624540329,
1360
+ "learning_rate": 4.224817001679011e-06,
1361
+ "logps/chosen": -12.04401277240954,
1362
+ "logps/rejected": -15.022868229792667,
1363
+ "loss": 0.504,
1364
+ "rewards/chosen": 0.0031436879776026074,
1365
+ "rewards/margins": -0.0013222903284708012,
1366
+ "rewards/rejected": 0.004465978306073409,
1367
+ "step": 98
1368
+ },
1369
+ {
1370
+ "epoch": 0.6672283066554339,
1371
+ "grad_norm": 0.26045116782188416,
1372
+ "kl": 0.04508065804839134,
1373
+ "learning_rate": 4.085344012638067e-06,
1374
+ "logps/chosen": -12.453051017992424,
1375
+ "logps/rejected": -13.936600223664314,
1376
+ "loss": 0.4996,
1377
+ "rewards/chosen": 0.006089657545089722,
1378
+ "rewards/margins": 0.002818903375056482,
1379
+ "rewards/rejected": 0.0032707541700332395,
1380
+ "step": 99
1381
+ },
1382
+ {
1383
+ "epoch": 0.6739679865206403,
1384
+ "grad_norm": 0.9558663964271545,
1385
+ "kl": 0.04088562726974487,
1386
+ "learning_rate": 3.94660365002137e-06,
1387
+ "logps/chosen": -11.501955817727481,
1388
+ "logps/rejected": -14.433762613932291,
1389
+ "loss": 0.5039,
1390
+ "rewards/chosen": 0.004746326628853293,
1391
+ "rewards/margins": 0.0011829081411455188,
1392
+ "rewards/rejected": 0.003563418487707774,
1393
+ "step": 100
1394
+ },
1395
+ {
1396
+ "epoch": 0.6807076663858467,
1397
+ "grad_norm": 0.5541660189628601,
1398
+ "kl": 0.0344005823135376,
1399
+ "learning_rate": 3.808707042884176e-06,
1400
+ "logps/chosen": -13.376131119266633,
1401
+ "logps/rejected": -26.542641379616477,
1402
+ "loss": 0.5008,
1403
+ "rewards/chosen": 0.0056792925442418745,
1404
+ "rewards/margins": 0.003930307243780423,
1405
+ "rewards/rejected": 0.0017489853004614513,
1406
+ "step": 101
1407
+ },
1408
+ {
1409
+ "epoch": 0.6874473462510531,
1410
+ "grad_norm": 0.3918946087360382,
1411
+ "kl": 0.038804955780506134,
1412
+ "learning_rate": 3.6717646444456196e-06,
1413
+ "logps/chosen": -12.274094154094827,
1414
+ "logps/rejected": -14.250191824776786,
1415
+ "loss": 0.5003,
1416
+ "rewards/chosen": 0.005658124541414195,
1417
+ "rewards/margins": 0.0010038192195845359,
1418
+ "rewards/rejected": 0.004654305321829659,
1419
+ "step": 102
1420
+ },
1421
+ {
1422
+ "epoch": 0.6941870261162595,
1423
+ "grad_norm": 0.45218226313591003,
1424
+ "kl": 0.048004306852817535,
1425
+ "learning_rate": 3.5358861436172487e-06,
1426
+ "logps/chosen": -12.376973152160645,
1427
+ "logps/rejected": -15.0717134475708,
1428
+ "loss": 0.5002,
1429
+ "rewards/chosen": 0.007525511085987091,
1430
+ "rewards/margins": 0.00302144605666399,
1431
+ "rewards/rejected": 0.004504065029323101,
1432
+ "step": 103
1433
+ },
1434
+ {
1435
+ "epoch": 0.7009267059814659,
1436
+ "grad_norm": 0.4819565713405609,
1437
+ "kl": 0.024100862443447113,
1438
+ "learning_rate": 3.401180377143774e-06,
1439
+ "logps/chosen": -11.111696079799108,
1440
+ "logps/rejected": -14.007704109981143,
1441
+ "loss": 0.5003,
1442
+ "rewards/chosen": 0.004905121667044503,
1443
+ "rewards/margins": 2.896227860098386e-05,
1444
+ "rewards/rejected": 0.004876159388443519,
1445
+ "step": 104
1446
+ },
1447
+ {
1448
+ "epoch": 0.7076663858466723,
1449
+ "grad_norm": 0.4155801236629486,
1450
+ "kl": 0.03822548687458038,
1451
+ "learning_rate": 3.2677552424263836e-06,
1452
+ "logps/chosen": -11.696998333108835,
1453
+ "logps/rejected": -13.534483119419642,
1454
+ "loss": 0.5012,
1455
+ "rewards/chosen": 0.004182511876369345,
1456
+ "rewards/margins": -0.00044645423372390806,
1457
+ "rewards/rejected": 0.004628966110093253,
1458
+ "step": 105
1459
+ },
1460
+ {
1461
+ "epoch": 0.7144060657118787,
1462
+ "grad_norm": 1.0962884426116943,
1463
+ "kl": 0.038600608706474304,
1464
+ "learning_rate": 3.1357176110984578e-06,
1465
+ "logps/chosen": -11.945585250854492,
1466
+ "logps/rejected": -15.512099266052246,
1467
+ "loss": 0.5007,
1468
+ "rewards/chosen": 0.005235604010522366,
1469
+ "rewards/margins": 0.0009247651323676109,
1470
+ "rewards/rejected": 0.004310838878154755,
1471
+ "step": 106
1472
+ },
1473
+ {
1474
+ "epoch": 0.7211457455770851,
1475
+ "grad_norm": 0.39281150698661804,
1476
+ "kl": 0.03581796586513519,
1477
+ "learning_rate": 3.0051732434229185e-06,
1478
+ "logps/chosen": -12.050497519003379,
1479
+ "logps/rejected": -14.399078369140625,
1480
+ "loss": 0.5003,
1481
+ "rewards/chosen": 0.006019020000019589,
1482
+ "rewards/margins": 0.0028386163237216597,
1483
+ "rewards/rejected": 0.0031804036762979296,
1484
+ "step": 107
1485
+ },
1486
+ {
1487
+ "epoch": 0.7278854254422915,
1488
+ "grad_norm": 0.44166335463523865,
1489
+ "kl": 0.030040442943572998,
1490
+ "learning_rate": 2.8762267035797607e-06,
1491
+ "logps/chosen": -12.115191650390624,
1492
+ "logps/rejected": -27.07490270278033,
1493
+ "loss": 0.4988,
1494
+ "rewards/chosen": 0.005298165480295817,
1495
+ "rewards/margins": 0.013872249804291071,
1496
+ "rewards/rejected": -0.008574084323995253,
1497
+ "step": 108
1498
+ },
1499
+ {
1500
+ "epoch": 0.7346251053074979,
1501
+ "grad_norm": 0.05213148146867752,
1502
+ "kl": 0.033815205097198486,
1503
+ "learning_rate": 2.748981275911633e-06,
1504
+ "logps/chosen": -11.917203630719866,
1505
+ "logps/rejected": -14.394385443793404,
1506
+ "loss": 0.4998,
1507
+ "rewards/chosen": 0.006377638982875007,
1508
+ "rewards/margins": 0.0014188214545212095,
1509
+ "rewards/rejected": 0.004958817528353797,
1510
+ "step": 109
1511
+ },
1512
+ {
1513
+ "epoch": 0.7413647851727043,
1514
+ "grad_norm": 0.5125302672386169,
1515
+ "kl": 0.028670266270637512,
1516
+ "learning_rate": 2.6235388821945497e-06,
1517
+ "logps/chosen": -13.117928466796876,
1518
+ "logps/rejected": -13.176620092147436,
1519
+ "loss": 0.5013,
1520
+ "rewards/chosen": 0.0036279964447021485,
1521
+ "rewards/margins": -0.0016446411303984811,
1522
+ "rewards/rejected": 0.00527263757510063,
1523
+ "step": 110
1524
+ },
1525
+ {
1526
+ "epoch": 0.7481044650379107,
1527
+ "grad_norm": 0.46608638763427734,
1528
+ "kl": 0.027143821120262146,
1529
+ "learning_rate": 2.5000000000000015e-06,
1530
+ "logps/chosen": -12.09948812948691,
1531
+ "logps/rejected": -14.054656982421875,
1532
+ "loss": 0.5006,
1533
+ "rewards/chosen": 0.004925508354161237,
1534
+ "rewards/margins": 0.0006050687130387724,
1535
+ "rewards/rejected": 0.004320439641122465,
1536
+ "step": 111
1537
+ },
1538
+ {
1539
+ "epoch": 0.7548441449031171,
1540
+ "grad_norm": 0.05120411515235901,
1541
+ "kl": 0.04401693493127823,
1542
+ "learning_rate": 2.3784635822138424e-06,
1543
+ "logps/chosen": -11.725931475239415,
1544
+ "logps/rejected": -14.287259188565342,
1545
+ "loss": 0.4996,
1546
+ "rewards/chosen": 0.006879215278933125,
1547
+ "rewards/margins": 0.0028178333536970296,
1548
+ "rewards/rejected": 0.004061381925236095,
1549
+ "step": 112
1550
+ },
1551
+ {
1552
+ "epoch": 0.7548441449031171,
1553
+ "eval_kl": 0.04366951808333397,
1554
+ "eval_logps/chosen": -12.08456680913677,
1555
+ "eval_logps/rejected": -16.13528211022112,
1556
+ "eval_loss": 0.5012484788894653,
1557
+ "eval_rewards/chosen": 0.006296707375701768,
1558
+ "eval_rewards/margins": 0.0034967447587021107,
1559
+ "eval_rewards/rejected": 0.002799962616999657,
1560
+ "eval_runtime": 116.9002,
1561
+ "eval_samples_per_second": 4.277,
1562
+ "eval_steps_per_second": 1.069,
1563
+ "step": 112
1564
+ },
1565
+ {
1566
+ "epoch": 0.7615838247683235,
1567
+ "grad_norm": 0.8294048309326172,
1568
+ "kl": 0.033523499965667725,
1569
+ "learning_rate": 2.2590269777764516e-06,
1570
+ "logps/chosen": -12.994817461286273,
1571
+ "logps/rejected": -20.967634412977432,
1572
+ "loss": 0.5014,
1573
+ "rewards/chosen": 0.004394657643777984,
1574
+ "rewards/margins": 0.007389767864157283,
1575
+ "rewards/rejected": -0.0029951102203792995,
1576
+ "step": 113
1577
+ },
1578
+ {
1579
+ "epoch": 0.7683235046335299,
1580
+ "grad_norm": 0.6650245785713196,
1581
+ "kl": 0.044409215450286865,
1582
+ "learning_rate": 2.141785853707607e-06,
1583
+ "logps/chosen": -13.633831317608173,
1584
+ "logps/rejected": -14.064775390625,
1585
+ "loss": 0.5009,
1586
+ "rewards/chosen": 0.00470560521651537,
1587
+ "rewards/margins": 0.0020437052922371106,
1588
+ "rewards/rejected": 0.002661899924278259,
1589
+ "step": 114
1590
+ },
1591
+ {
1592
+ "epoch": 0.7750631844987363,
1593
+ "grad_norm": 1.0355861186981201,
1594
+ "kl": 0.03707036375999451,
1595
+ "learning_rate": 2.0268341184785674e-06,
1596
+ "logps/chosen": -12.006687837488512,
1597
+ "logps/rejected": -12.987567138671874,
1598
+ "loss": 0.5002,
1599
+ "rewards/chosen": 0.005552893614067751,
1600
+ "rewards/margins": 0.001937851295167325,
1601
+ "rewards/rejected": 0.003615042318900426,
1602
+ "step": 115
1603
+ },
1604
+ {
1605
+ "epoch": 0.7818028643639428,
1606
+ "grad_norm": 0.3267318606376648,
1607
+ "kl": 0.03743256628513336,
1608
+ "learning_rate": 1.9142638467927254e-06,
1609
+ "logps/chosen": -12.60701437557445,
1610
+ "logps/rejected": -14.109915161132813,
1611
+ "loss": 0.4998,
1612
+ "rewards/chosen": 0.005473397233906914,
1613
+ "rewards/margins": 0.001956190723998874,
1614
+ "rewards/rejected": 0.0035172065099080404,
1615
+ "step": 116
1616
+ },
1617
+ {
1618
+ "epoch": 0.7885425442291492,
1619
+ "grad_norm": 1.15928316116333,
1620
+ "kl": 0.035497263073921204,
1621
+ "learning_rate": 1.8041652058350768e-06,
1622
+ "logps/chosen": -12.61219596862793,
1623
+ "logps/rejected": -14.834460258483887,
1624
+ "loss": 0.5042,
1625
+ "rewards/chosen": 0.004248159006237984,
1626
+ "rewards/margins": -0.0009399871341884136,
1627
+ "rewards/rejected": 0.005188146140426397,
1628
+ "step": 117
1629
+ },
1630
+ {
1631
+ "epoch": 0.7952822240943556,
1632
+ "grad_norm": 0.9165633916854858,
1633
+ "kl": 0.0436873733997345,
1634
+ "learning_rate": 1.6966263830495939e-06,
1635
+ "logps/chosen": -11.850751989028034,
1636
+ "logps/rejected": -21.343756103515624,
1637
+ "loss": 0.5008,
1638
+ "rewards/chosen": 0.00493351457750096,
1639
+ "rewards/margins": 0.00906530259286656,
1640
+ "rewards/rejected": -0.0041317880153656,
1641
+ "step": 118
1642
+ },
1643
+ {
1644
+ "epoch": 0.802021903959562,
1645
+ "grad_norm": 0.3139224648475647,
1646
+ "kl": 0.041897207498550415,
1647
+ "learning_rate": 1.5917335155023368e-06,
1648
+ "logps/chosen": -11.65659688313802,
1649
+ "logps/rejected": -15.448626349954043,
1650
+ "loss": 0.5,
1651
+ "rewards/chosen": 0.005463708937168121,
1652
+ "rewards/margins": 0.002067950367927551,
1653
+ "rewards/rejected": 0.00339575856924057,
1654
+ "step": 119
1655
+ },
1656
+ {
1657
+ "epoch": 0.8087615838247684,
1658
+ "grad_norm": 0.1552770733833313,
1659
+ "kl": 0.03437415510416031,
1660
+ "learning_rate": 1.4895706208868876e-06,
1661
+ "logps/chosen": -11.553650684845753,
1662
+ "logps/rejected": -13.126873779296876,
1663
+ "loss": 0.4998,
1664
+ "rewards/chosen": 0.0057815661032994585,
1665
+ "rewards/margins": 0.0021546780069669085,
1666
+ "rewards/rejected": 0.00362688809633255,
1667
+ "step": 120
1668
+ },
1669
+ {
1670
+ "epoch": 0.8155012636899748,
1671
+ "grad_norm": 0.5283660292625427,
1672
+ "kl": 0.038322046399116516,
1673
+ "learning_rate": 1.390219530227378e-06,
1674
+ "logps/chosen": -12.621611595153809,
1675
+ "logps/rejected": -14.466289520263672,
1676
+ "loss": 0.5009,
1677
+ "rewards/chosen": 0.005145063623785973,
1678
+ "rewards/margins": 0.0015712629538029432,
1679
+ "rewards/rejected": 0.0035738006699830294,
1680
+ "step": 121
1681
+ },
1682
+ {
1683
+ "epoch": 0.8222409435551812,
1684
+ "grad_norm": 0.976498007774353,
1685
+ "kl": 0.03444386273622513,
1686
+ "learning_rate": 1.2937598223330006e-06,
1687
+ "logps/chosen": -12.92395662006579,
1688
+ "logps/rejected": -22.93274864783654,
1689
+ "loss": 0.5004,
1690
+ "rewards/chosen": 0.005029119943317614,
1691
+ "rewards/margins": 0.01761611934132904,
1692
+ "rewards/rejected": -0.012586999398011427,
1693
+ "step": 122
1694
+ },
1695
+ {
1696
+ "epoch": 0.8289806234203876,
1697
+ "grad_norm": 0.7180256843566895,
1698
+ "kl": 0.03909187763929367,
1699
+ "learning_rate": 1.2002687600565138e-06,
1700
+ "logps/chosen": -13.27630615234375,
1701
+ "logps/rejected": -14.22623291015625,
1702
+ "loss": 0.5006,
1703
+ "rewards/chosen": 0.005644300842986387,
1704
+ "rewards/margins": 0.002153261268840116,
1705
+ "rewards/rejected": 0.003491039574146271,
1706
+ "step": 123
1707
+ },
1708
+ {
1709
+ "epoch": 0.8357203032855939,
1710
+ "grad_norm": 0.6930535435676575,
1711
+ "kl": 0.04802556335926056,
1712
+ "learning_rate": 1.1098212284078037e-06,
1713
+ "logps/chosen": -12.030011407260236,
1714
+ "logps/rejected": -19.90482352120536,
1715
+ "loss": 0.5013,
1716
+ "rewards/chosen": 0.004783567683450107,
1717
+ "rewards/margins": -0.001280770234286491,
1718
+ "rewards/rejected": 0.006064337917736598,
1719
+ "step": 124
1720
+ },
1721
+ {
1722
+ "epoch": 0.8424599831508003,
1723
+ "grad_norm": 0.26308876276016235,
1724
+ "kl": 0.034075237810611725,
1725
+ "learning_rate": 1.0224896745720513e-06,
1726
+ "logps/chosen": -12.563464749243952,
1727
+ "logps/rejected": -13.066956491181344,
1728
+ "loss": 0.4999,
1729
+ "rewards/chosen": 0.006204346975972576,
1730
+ "rewards/margins": 0.0027266234407093987,
1731
+ "rewards/rejected": 0.003477723535263177,
1732
+ "step": 125
1733
+ },
1734
+ {
1735
+ "epoch": 0.8491996630160067,
1736
+ "grad_norm": 0.05575637146830559,
1737
+ "kl": 0.03818386048078537,
1738
+ "learning_rate": 9.383440498805712e-07,
1739
+ "logps/chosen": -12.770438561072716,
1740
+ "logps/rejected": -13.97016023334704,
1741
+ "loss": 0.4997,
1742
+ "rewards/chosen": 0.005511786502141219,
1743
+ "rewards/margins": 0.0017519345288334585,
1744
+ "rewards/rejected": 0.00375985197330776,
1745
+ "step": 126
1746
+ },
1747
+ {
1748
+ "epoch": 0.8559393428812131,
1749
+ "grad_norm": 0.051352065056562424,
1750
+ "kl": 0.03995239734649658,
1751
+ "learning_rate": 8.574517537807897e-07,
1752
+ "logps/chosen": -10.37672831217448,
1753
+ "logps/rejected": -15.862617043887868,
1754
+ "loss": 0.4999,
1755
+ "rewards/chosen": 0.005997484922409058,
1756
+ "rewards/margins": 0.0014087323756778944,
1757
+ "rewards/rejected": 0.004588752546731164,
1758
+ "step": 127
1759
+ },
1760
+ {
1761
+ "epoch": 0.8626790227464195,
1762
+ "grad_norm": 3.6663429737091064,
1763
+ "kl": 0.02988211065530777,
1764
+ "learning_rate": 7.798775798502484e-07,
1765
+ "logps/chosen": -12.772437201605904,
1766
+ "logps/rejected": -19.50521438186233,
1767
+ "loss": 0.5077,
1768
+ "rewards/chosen": 0.003772258758544922,
1769
+ "rewards/margins": 0.0015482741433220939,
1770
+ "rewards/rejected": 0.002223984615222828,
1771
+ "step": 128
1772
+ },
1773
+ {
1774
+ "epoch": 0.8626790227464195,
1775
+ "eval_kl": 0.043279923498630524,
1776
+ "eval_logps/chosen": -12.084393830577355,
1777
+ "eval_logps/rejected": -16.137369204083935,
1778
+ "eval_loss": 0.5004793405532837,
1779
+ "eval_rewards/chosen": 0.006313999672107098,
1780
+ "eval_rewards/margins": 0.003722760686253593,
1781
+ "eval_rewards/rejected": 0.002591238985853505,
1782
+ "eval_runtime": 117.1667,
1783
+ "eval_samples_per_second": 4.267,
1784
+ "eval_steps_per_second": 1.067,
1785
+ "step": 128
1786
+ },
1787
+ {
1788
+ "epoch": 0.8694187026116259,
1789
+ "grad_norm": 0.5401572585105896,
1790
+ "kl": 0.032129742205142975,
1791
+ "learning_rate": 7.056836638978698e-07,
1792
+ "logps/chosen": -13.479878425598145,
1793
+ "logps/rejected": -13.713187217712402,
1794
+ "loss": 0.5014,
1795
+ "rewards/chosen": 0.004872842226177454,
1796
+ "rewards/margins": 0.0015969944652169943,
1797
+ "rewards/rejected": 0.0032758477609604597,
1798
+ "step": 129
1799
+ },
1800
+ {
1801
+ "epoch": 0.8761583824768323,
1802
+ "grad_norm": 0.4993322193622589,
1803
+ "kl": 0.02726106345653534,
1804
+ "learning_rate": 6.349294341940593e-07,
1805
+ "logps/chosen": -11.89748062626008,
1806
+ "logps/rejected": -14.18370194868608,
1807
+ "loss": 0.5008,
1808
+ "rewards/chosen": 0.005981699112922915,
1809
+ "rewards/margins": 0.0013335568726587158,
1810
+ "rewards/rejected": 0.004648142240264199,
1811
+ "step": 130
1812
+ },
1813
+ {
1814
+ "epoch": 0.8828980623420387,
1815
+ "grad_norm": 0.9090181589126587,
1816
+ "kl": 0.04542076587677002,
1817
+ "learning_rate": 5.676715638695063e-07,
1818
+ "logps/chosen": -11.69959716796875,
1819
+ "logps/rejected": -14.096754402949893,
1820
+ "loss": 0.5006,
1821
+ "rewards/chosen": 0.005059713976723807,
1822
+ "rewards/margins": 0.0012234999481680357,
1823
+ "rewards/rejected": 0.0038362140285557716,
1824
+ "step": 131
1825
+ },
1826
+ {
1827
+ "epoch": 0.8896377422072451,
1828
+ "grad_norm": 0.5459097027778625,
1829
+ "kl": 0.04598844051361084,
1830
+ "learning_rate": 5.039639255208156e-07,
1831
+ "logps/chosen": -10.5826806640625,
1832
+ "logps/rejected": -14.339314778645834,
1833
+ "loss": 0.5004,
1834
+ "rewards/chosen": 0.005033199787139893,
1835
+ "rewards/margins": 0.0015520608119475535,
1836
+ "rewards/rejected": 0.003481138975192339,
1837
+ "step": 132
1838
+ },
1839
+ {
1840
+ "epoch": 0.8963774220724515,
1841
+ "grad_norm": 0.36847659945487976,
1842
+ "kl": 0.04135167598724365,
1843
+ "learning_rate": 4.43857548059321e-07,
1844
+ "logps/chosen": -12.215231759207589,
1845
+ "logps/rejected": -14.881442365975216,
1846
+ "loss": 0.5009,
1847
+ "rewards/chosen": 0.005727479713303702,
1848
+ "rewards/margins": -0.008206131613900509,
1849
+ "rewards/rejected": 0.01393361132720421,
1850
+ "step": 133
1851
+ },
1852
+ {
1853
+ "epoch": 0.9031171019376579,
1854
+ "grad_norm": 0.46339669823646545,
1855
+ "kl": 0.040303200483322144,
1856
+ "learning_rate": 3.87400575837657e-07,
1857
+ "logps/chosen": -12.203319549560547,
1858
+ "logps/rejected": -15.03320026397705,
1859
+ "loss": 0.5005,
1860
+ "rewards/chosen": 0.004549141973257065,
1861
+ "rewards/margins": -2.8897076845169067e-05,
1862
+ "rewards/rejected": 0.004578039050102234,
1863
+ "step": 134
1864
+ },
1865
+ {
1866
+ "epoch": 0.9098567818028643,
1867
+ "grad_norm": 0.5133540034294128,
1868
+ "kl": 0.04136868566274643,
1869
+ "learning_rate": 3.346382300868134e-07,
1870
+ "logps/chosen": -10.265506320529514,
1871
+ "logps/rejected": -15.82147216796875,
1872
+ "loss": 0.501,
1873
+ "rewards/chosen": 0.005173130167855157,
1874
+ "rewards/margins": 0.001688669617469604,
1875
+ "rewards/rejected": 0.0034844605503855527,
1876
+ "step": 135
1877
+ },
1878
+ {
1879
+ "epoch": 0.9165964616680707,
1880
+ "grad_norm": 0.220728799700737,
1881
+ "kl": 0.04973362386226654,
1882
+ "learning_rate": 2.85612772694579e-07,
1883
+ "logps/chosen": -12.548672380118534,
1884
+ "logps/rejected": -15.221932547433036,
1885
+ "loss": 0.5,
1886
+ "rewards/chosen": 0.005709037184715271,
1887
+ "rewards/margins": 0.001126085434641157,
1888
+ "rewards/rejected": 0.004582951750074114,
1889
+ "step": 136
1890
+ },
1891
+ {
1892
+ "epoch": 0.9233361415332771,
1893
+ "grad_norm": 0.601239800453186,
1894
+ "kl": 0.042025819420814514,
1895
+ "learning_rate": 2.403634723543674e-07,
1896
+ "logps/chosen": -12.426138136121962,
1897
+ "logps/rejected": -14.724080766950335,
1898
+ "loss": 0.5028,
1899
+ "rewards/chosen": 0.005159664071268505,
1900
+ "rewards/margins": -0.003907858556698239,
1901
+ "rewards/rejected": 0.009067522627966744,
1902
+ "step": 137
1903
+ },
1904
+ {
1905
+ "epoch": 0.9300758213984835,
1906
+ "grad_norm": 0.35333240032196045,
1907
+ "kl": 0.042685166001319885,
1908
+ "learning_rate": 1.989265731115525e-07,
1909
+ "logps/chosen": -12.459028089368665,
1910
+ "logps/rejected": -15.966356065538195,
1911
+ "loss": 0.5002,
1912
+ "rewards/chosen": 0.005651528368125091,
1913
+ "rewards/margins": 0.0006864719443373732,
1914
+ "rewards/rejected": 0.004965056423787717,
1915
+ "step": 138
1916
+ },
1917
+ {
1918
+ "epoch": 0.93681550126369,
1919
+ "grad_norm": 0.5635568499565125,
1920
+ "kl": 0.03784912824630737,
1921
+ "learning_rate": 1.6133526533250566e-07,
1922
+ "logps/chosen": -13.104137073863637,
1923
+ "logps/rejected": -27.47379426033266,
1924
+ "loss": 0.4988,
1925
+ "rewards/chosen": 0.004985654895955866,
1926
+ "rewards/margins": 0.015799503522184937,
1927
+ "rewards/rejected": -0.010813848626229071,
1928
+ "step": 139
1929
+ },
1930
+ {
1931
+ "epoch": 0.9435551811288964,
1932
+ "grad_norm": 0.7115698456764221,
1933
+ "kl": 0.03980047255754471,
1934
+ "learning_rate": 1.2761965911958385e-07,
1935
+ "logps/chosen": -11.41650390625,
1936
+ "logps/rejected": -14.686680385044642,
1937
+ "loss": 0.5017,
1938
+ "rewards/chosen": 0.005472733184348705,
1939
+ "rewards/margins": 0.0009131208392208733,
1940
+ "rewards/rejected": 0.004559612345127832,
1941
+ "step": 140
1942
+ },
1943
+ {
1944
+ "epoch": 0.9502948609941028,
1945
+ "grad_norm": 0.1111406609416008,
1946
+ "kl": 0.048449933528900146,
1947
+ "learning_rate": 9.780676019336632e-08,
1948
+ "logps/chosen": -12.19880845811632,
1949
+ "logps/rejected": -14.06743724926098,
1950
+ "loss": 0.4998,
1951
+ "rewards/chosen": 0.006176349189546373,
1952
+ "rewards/margins": 0.0013241183023910974,
1953
+ "rewards/rejected": 0.004852230887155275,
1954
+ "step": 141
1955
+ },
1956
+ {
1957
+ "epoch": 0.9570345408593092,
1958
+ "grad_norm": 0.7242898941040039,
1959
+ "kl": 0.0280449241399765,
1960
+ "learning_rate": 7.192044826145772e-08,
1961
+ "logps/chosen": -12.05575180053711,
1962
+ "logps/rejected": -20.671449661254883,
1963
+ "loss": 0.4999,
1964
+ "rewards/chosen": 0.0058912248350679874,
1965
+ "rewards/margins": 0.011503569316118956,
1966
+ "rewards/rejected": -0.005612344481050968,
1967
+ "step": 142
1968
+ },
1969
+ {
1970
+ "epoch": 0.9637742207245156,
1971
+ "grad_norm": 0.7172530889511108,
1972
+ "kl": 0.0630464106798172,
1973
+ "learning_rate": 4.998145789118114e-08,
1974
+ "logps/chosen": -11.753687241498161,
1975
+ "logps/rejected": -16.088516235351562,
1976
+ "loss": 0.5005,
1977
+ "rewards/chosen": 0.004976525026209214,
1978
+ "rewards/margins": -0.0003784421904414305,
1979
+ "rewards/rejected": 0.005354967216650645,
1980
+ "step": 143
1981
+ },
1982
+ {
1983
+ "epoch": 0.970513900589722,
1984
+ "grad_norm": 0.5185176134109497,
1985
+ "kl": 0.031040333211421967,
1986
+ "learning_rate": 3.2007361901485455e-08,
1987
+ "logps/chosen": -12.094314575195312,
1988
+ "logps/rejected": -13.739120483398438,
1989
+ "loss": 0.5012,
1990
+ "rewards/chosen": 0.006150585495763355,
1991
+ "rewards/margins": 0.001710880577327713,
1992
+ "rewards/rejected": 0.004439704918435642,
1993
+ "step": 144
1994
+ },
1995
+ {
1996
+ "epoch": 0.970513900589722,
1997
+ "eval_kl": 0.04314365237951279,
1998
+ "eval_logps/chosen": -12.083704105941704,
1999
+ "eval_logps/rejected": -16.140120854016246,
2000
+ "eval_loss": 0.5004004240036011,
2001
+ "eval_rewards/chosen": 0.006382944337990252,
2002
+ "eval_rewards/margins": 0.004066789610123318,
2003
+ "eval_rewards/rejected": 0.0023161547278669338,
2004
+ "eval_runtime": 116.8675,
2005
+ "eval_samples_per_second": 4.278,
2006
+ "eval_steps_per_second": 1.07,
2007
+ "step": 144
2008
+ },
2009
+ {
2010
+ "epoch": 0.9772535804549284,
2011
+ "grad_norm": 0.25673621892929077,
2012
+ "kl": 0.0404113307595253,
2013
+ "learning_rate": 1.8012557287367394e-08,
2014
+ "logps/chosen": -12.68321533203125,
2015
+ "logps/rejected": -21.220464369829962,
2016
+ "loss": 0.4988,
2017
+ "rewards/chosen": 0.005415428181489309,
2018
+ "rewards/margins": 0.009387091211244172,
2019
+ "rewards/rejected": -0.003971663029754863,
2020
+ "step": 145
2021
+ },
2022
+ {
2023
+ "epoch": 0.9839932603201348,
2024
+ "grad_norm": 0.4361318349838257,
2025
+ "kl": 0.031056255102157593,
2026
+ "learning_rate": 8.008253688084888e-09,
2027
+ "logps/chosen": -12.425528861380911,
2028
+ "logps/rejected": -15.46146873191551,
2029
+ "loss": 0.5013,
2030
+ "rewards/chosen": 0.0055796177806081,
2031
+ "rewards/margins": 0.00028554923660881713,
2032
+ "rewards/rejected": 0.005294068543999283,
2033
+ "step": 146
2034
+ },
2035
+ {
2036
+ "epoch": 0.9907329401853412,
2037
+ "grad_norm": 0.9861050844192505,
2038
+ "kl": 0.03052590787410736,
2039
+ "learning_rate": 2.002464408392135e-09,
2040
+ "logps/chosen": -12.423692272555444,
2041
+ "logps/rejected": -12.56340350526752,
2042
+ "loss": 0.5021,
2043
+ "rewards/chosen": 0.0039527707042232635,
2044
+ "rewards/margins": -0.0018114563176009667,
2045
+ "rewards/rejected": 0.00576422702182423,
2046
+ "step": 147
2047
+ },
2048
+ {
2049
+ "epoch": 0.9974726200505476,
2050
+ "grad_norm": 0.8566290736198425,
2051
+ "kl": 0.03132675588130951,
2052
+ "learning_rate": 0.0,
2053
+ "logps/chosen": -12.614923292590726,
2054
+ "logps/rejected": -14.566915801077178,
2055
+ "loss": 0.5016,
2056
+ "rewards/chosen": 0.00556570339587427,
2057
+ "rewards/margins": 0.0018311565412337718,
2058
+ "rewards/rejected": 0.003734546854640498,
2059
+ "step": 148
2060
+ },
2061
+ {
2062
+ "epoch": 0.9974726200505476,
2063
+ "step": 148,
2064
+ "total_flos": 2.1537474524636774e+17,
2065
+ "train_loss": 0.5015917984214989,
2066
+ "train_runtime": 4835.0561,
2067
+ "train_samples_per_second": 1.964,
2068
+ "train_steps_per_second": 0.031
2069
+ }
2070
+ ],
2071
+ "logging_steps": 1,
2072
+ "max_steps": 148,
2073
+ "num_input_tokens_seen": 0,
2074
+ "num_train_epochs": 1,
2075
+ "save_steps": 500,
2076
+ "stateful_callbacks": {
2077
+ "TrainerControl": {
2078
+ "args": {
2079
+ "should_epoch_stop": false,
2080
+ "should_evaluate": false,
2081
+ "should_log": false,
2082
+ "should_save": true,
2083
+ "should_training_stop": true
2084
+ },
2085
+ "attributes": {}
2086
+ }
2087
+ },
2088
+ "total_flos": 2.1537474524636774e+17,
2089
+ "train_batch_size": 2,
2090
+ "trial_name": null,
2091
+ "trial_params": null
2092
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be0fdd1ce44e1386ee04dc9c296edce27ecf9908954ea5e43d3d739dd28c919c
3
+ size 6712
training_eval_loss.png ADDED
training_loss.png ADDED