Farouk
commited on
Commit
·
f8b60c0
1
Parent(s):
0eb810b
Training in progress, step 6200
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- all_results.json +8 -8
- checkpoint-4200/adapter_model/adapter_model/README.md +24 -0
- checkpoint-4200/adapter_model/adapter_model/adapter_model.bin +1 -1
- checkpoint-6200/README.md +20 -0
- checkpoint-6200/adapter_config.json +26 -0
- checkpoint-6200/adapter_model.bin +3 -0
- checkpoint-6200/added_tokens.json +3 -0
- checkpoint-6200/optimizer.pt +3 -0
- checkpoint-6200/rng_state.pth +3 -0
- checkpoint-6200/scheduler.pt +3 -0
- checkpoint-6200/special_tokens_map.json +6 -0
- checkpoint-6200/tokenizer.model +3 -0
- checkpoint-6200/tokenizer_config.json +35 -0
- checkpoint-6200/trainer_state.json +0 -0
- checkpoint-6200/training_args.bin +3 -0
- eval_results.json +4 -4
- metrics.json +1 -1
- train_results.json +5 -5
- trainer_state.json +3666 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 64,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"o_proj",
|
18 |
-
"k_proj",
|
19 |
-
"down_proj",
|
20 |
"gate_proj",
|
|
|
|
|
21 |
"up_proj",
|
22 |
"v_proj",
|
23 |
-
"
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 64,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"gate_proj",
|
18 |
+
"down_proj",
|
19 |
+
"q_proj",
|
20 |
"up_proj",
|
21 |
"v_proj",
|
22 |
+
"k_proj",
|
23 |
+
"o_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 871609293
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89e3e86e88a2a473616d28f379b3735697c068cbf1c5d7c8fe7b56148a37a0af
|
3 |
size 871609293
|
all_results.json
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{
|
2 |
-
"epoch": 0.
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
-
"eval_runtime": 21.
|
5 |
-
"eval_samples_per_second": 2.
|
6 |
-
"eval_steps_per_second": 1.
|
7 |
-
"train_loss": 0.
|
8 |
-
"train_runtime":
|
9 |
-
"train_samples_per_second": 17.
|
10 |
-
"train_steps_per_second": 17.
|
11 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 0.05,
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
+
"eval_runtime": 21.7341,
|
5 |
+
"eval_samples_per_second": 2.301,
|
6 |
+
"eval_steps_per_second": 1.15,
|
7 |
+
"train_loss": 0.5874443841576577,
|
8 |
+
"train_runtime": 1725.6374,
|
9 |
+
"train_samples_per_second": 17.385,
|
10 |
+
"train_steps_per_second": 17.385
|
11 |
}
|
checkpoint-4200/adapter_model/adapter_model/README.md
CHANGED
@@ -114,6 +114,28 @@ The following `bitsandbytes` quantization config was used during training:
|
|
114 |
- bnb_4bit_use_double_quant: True
|
115 |
- bnb_4bit_compute_dtype: bfloat16
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
The following `bitsandbytes` quantization config was used during training:
|
118 |
- load_in_8bit: False
|
119 |
- load_in_4bit: True
|
@@ -136,5 +158,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
136 |
- PEFT 0.4.0
|
137 |
- PEFT 0.4.0
|
138 |
- PEFT 0.4.0
|
|
|
|
|
139 |
|
140 |
- PEFT 0.4.0
|
|
|
114 |
- bnb_4bit_use_double_quant: True
|
115 |
- bnb_4bit_compute_dtype: bfloat16
|
116 |
|
117 |
+
The following `bitsandbytes` quantization config was used during training:
|
118 |
+
- load_in_8bit: False
|
119 |
+
- load_in_4bit: True
|
120 |
+
- llm_int8_threshold: 6.0
|
121 |
+
- llm_int8_skip_modules: None
|
122 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
123 |
+
- llm_int8_has_fp16_weight: False
|
124 |
+
- bnb_4bit_quant_type: nf4
|
125 |
+
- bnb_4bit_use_double_quant: True
|
126 |
+
- bnb_4bit_compute_dtype: bfloat16
|
127 |
+
|
128 |
+
The following `bitsandbytes` quantization config was used during training:
|
129 |
+
- load_in_8bit: False
|
130 |
+
- load_in_4bit: True
|
131 |
+
- llm_int8_threshold: 6.0
|
132 |
+
- llm_int8_skip_modules: None
|
133 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
134 |
+
- llm_int8_has_fp16_weight: False
|
135 |
+
- bnb_4bit_quant_type: nf4
|
136 |
+
- bnb_4bit_use_double_quant: True
|
137 |
+
- bnb_4bit_compute_dtype: bfloat16
|
138 |
+
|
139 |
The following `bitsandbytes` quantization config was used during training:
|
140 |
- load_in_8bit: False
|
141 |
- load_in_4bit: True
|
|
|
158 |
- PEFT 0.4.0
|
159 |
- PEFT 0.4.0
|
160 |
- PEFT 0.4.0
|
161 |
+
- PEFT 0.4.0
|
162 |
+
- PEFT 0.4.0
|
163 |
|
164 |
- PEFT 0.4.0
|
checkpoint-4200/adapter_model/adapter_model/adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 871609293
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff18c40f9b3c9fb20f1c95d4dff151244eba09eee79ae11c6121cc23181c2442
|
3 |
size 871609293
|
checkpoint-6200/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
---
|
4 |
+
## Training procedure
|
5 |
+
|
6 |
+
|
7 |
+
The following `bitsandbytes` quantization config was used during training:
|
8 |
+
- load_in_8bit: False
|
9 |
+
- load_in_4bit: True
|
10 |
+
- llm_int8_threshold: 6.0
|
11 |
+
- llm_int8_skip_modules: None
|
12 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
13 |
+
- llm_int8_has_fp16_weight: False
|
14 |
+
- bnb_4bit_quant_type: nf4
|
15 |
+
- bnb_4bit_use_double_quant: True
|
16 |
+
- bnb_4bit_compute_dtype: bfloat16
|
17 |
+
### Framework versions
|
18 |
+
|
19 |
+
|
20 |
+
- PEFT 0.4.0
|
checkpoint-6200/adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_mapping": null,
|
3 |
+
"base_model_name_or_path": "codellama/CodeLlama-34b-Python-hf",
|
4 |
+
"bias": "none",
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"layers_pattern": null,
|
9 |
+
"layers_to_transform": null,
|
10 |
+
"lora_alpha": 16.0,
|
11 |
+
"lora_dropout": 0.1,
|
12 |
+
"modules_to_save": null,
|
13 |
+
"peft_type": "LORA",
|
14 |
+
"r": 64,
|
15 |
+
"revision": null,
|
16 |
+
"target_modules": [
|
17 |
+
"gate_proj",
|
18 |
+
"down_proj",
|
19 |
+
"q_proj",
|
20 |
+
"up_proj",
|
21 |
+
"v_proj",
|
22 |
+
"k_proj",
|
23 |
+
"o_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
checkpoint-6200/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89e3e86e88a2a473616d28f379b3735697c068cbf1c5d7c8fe7b56148a37a0af
|
3 |
+
size 871609293
|
checkpoint-6200/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[PAD]": 32000
|
3 |
+
}
|
checkpoint-6200/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f915a3be0537ed39b74e6b133c9652eb8040c1f61e027bd380f53fc1de4740e
|
3 |
+
size 873872799
|
checkpoint-6200/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d56947d85c4236c819c7e5ba1f3020a0c401b4caa051580cac172d0a50c72119
|
3 |
+
size 14511
|
checkpoint-6200/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81248501833af563175f43c1d681185643b8411cee1fb1e631b8687c465eb2e3
|
3 |
+
size 627
|
checkpoint-6200/special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
checkpoint-6200/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
checkpoint-6200/tokenizer_config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<s>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": false,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "</s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"legacy": null,
|
22 |
+
"model_max_length": 1000000000000000019884624838656,
|
23 |
+
"pad_token": null,
|
24 |
+
"padding_side": "right",
|
25 |
+
"sp_model_kwargs": {},
|
26 |
+
"tokenizer_class": "LlamaTokenizer",
|
27 |
+
"unk_token": {
|
28 |
+
"__type": "AddedToken",
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false
|
34 |
+
}
|
35 |
+
}
|
checkpoint-6200/trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-6200/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85783faab59f5f6d8bcf691e35bb86cff435e22f3fa9169bf4e56c0239c8d7e4
|
3 |
+
size 6011
|
eval_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch": 0.
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
-
"eval_runtime": 21.
|
5 |
-
"eval_samples_per_second": 2.
|
6 |
-
"eval_steps_per_second": 1.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 0.05,
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
+
"eval_runtime": 21.7341,
|
5 |
+
"eval_samples_per_second": 2.301,
|
6 |
+
"eval_steps_per_second": 1.15
|
7 |
}
|
metrics.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"run_name": "codellama34b_unnatural", "train_runtime":
|
|
|
1 |
+
{"run_name": "codellama34b_unnatural", "train_runtime": 1725.6374, "train_samples_per_second": 17.385, "train_steps_per_second": 17.385, "train_loss": 0.5874443841576577, "epoch": 0.05, "eval_loss": 6.335043907165527, "eval_runtime": 21.7341, "eval_samples_per_second": 2.301, "eval_steps_per_second": 1.15}
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch": 0.
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second": 17.
|
6 |
-
"train_steps_per_second": 17.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 0.05,
|
3 |
+
"train_loss": 0.5874443841576577,
|
4 |
+
"train_runtime": 1725.6374,
|
5 |
+
"train_samples_per_second": 17.385,
|
6 |
+
"train_steps_per_second": 17.385
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": 6.335043907165527,
|
3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -32928,11 +32928,3674 @@
|
|
32928 |
"train_runtime": 1748.3273,
|
32929 |
"train_samples_per_second": 17.159,
|
32930 |
"train_steps_per_second": 17.159
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32931 |
}
|
32932 |
],
|
32933 |
"max_steps": 30000,
|
32934 |
"num_train_epochs": 1,
|
32935 |
-
"total_flos":
|
32936 |
"trial_name": null,
|
32937 |
"trial_params": null
|
32938 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 6.335043907165527,
|
3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
4 |
+
"epoch": 0.045833015048506606,
|
5 |
+
"global_step": 6000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
32928 |
"train_runtime": 1748.3273,
|
32929 |
"train_samples_per_second": 17.159,
|
32930 |
"train_steps_per_second": 17.159
|
32931 |
+
},
|
32932 |
+
{
|
32933 |
+
"epoch": 0.04,
|
32934 |
+
"learning_rate": 0.0004,
|
32935 |
+
"loss": 8.7161,
|
32936 |
+
"step": 5401
|
32937 |
+
},
|
32938 |
+
{
|
32939 |
+
"epoch": 0.04,
|
32940 |
+
"learning_rate": 0.0004,
|
32941 |
+
"loss": 7.6554,
|
32942 |
+
"step": 5402
|
32943 |
+
},
|
32944 |
+
{
|
32945 |
+
"epoch": 0.04,
|
32946 |
+
"learning_rate": 0.0004,
|
32947 |
+
"loss": 6.7865,
|
32948 |
+
"step": 5403
|
32949 |
+
},
|
32950 |
+
{
|
32951 |
+
"epoch": 0.04,
|
32952 |
+
"learning_rate": 0.0004,
|
32953 |
+
"loss": 8.4439,
|
32954 |
+
"step": 5404
|
32955 |
+
},
|
32956 |
+
{
|
32957 |
+
"epoch": 0.04,
|
32958 |
+
"learning_rate": 0.0004,
|
32959 |
+
"loss": 7.0393,
|
32960 |
+
"step": 5405
|
32961 |
+
},
|
32962 |
+
{
|
32963 |
+
"epoch": 0.04,
|
32964 |
+
"learning_rate": 0.0004,
|
32965 |
+
"loss": 7.3437,
|
32966 |
+
"step": 5406
|
32967 |
+
},
|
32968 |
+
{
|
32969 |
+
"epoch": 0.04,
|
32970 |
+
"learning_rate": 0.0004,
|
32971 |
+
"loss": 8.6367,
|
32972 |
+
"step": 5407
|
32973 |
+
},
|
32974 |
+
{
|
32975 |
+
"epoch": 0.04,
|
32976 |
+
"learning_rate": 0.0004,
|
32977 |
+
"loss": 7.3527,
|
32978 |
+
"step": 5408
|
32979 |
+
},
|
32980 |
+
{
|
32981 |
+
"epoch": 0.04,
|
32982 |
+
"learning_rate": 0.0004,
|
32983 |
+
"loss": 7.4897,
|
32984 |
+
"step": 5409
|
32985 |
+
},
|
32986 |
+
{
|
32987 |
+
"epoch": 0.04,
|
32988 |
+
"learning_rate": 0.0004,
|
32989 |
+
"loss": 6.9515,
|
32990 |
+
"step": 5410
|
32991 |
+
},
|
32992 |
+
{
|
32993 |
+
"epoch": 0.04,
|
32994 |
+
"learning_rate": 0.0004,
|
32995 |
+
"loss": 6.6737,
|
32996 |
+
"step": 5411
|
32997 |
+
},
|
32998 |
+
{
|
32999 |
+
"epoch": 0.04,
|
33000 |
+
"learning_rate": 0.0004,
|
33001 |
+
"loss": 4.9802,
|
33002 |
+
"step": 5412
|
33003 |
+
},
|
33004 |
+
{
|
33005 |
+
"epoch": 0.04,
|
33006 |
+
"learning_rate": 0.0004,
|
33007 |
+
"loss": 6.9681,
|
33008 |
+
"step": 5413
|
33009 |
+
},
|
33010 |
+
{
|
33011 |
+
"epoch": 0.04,
|
33012 |
+
"learning_rate": 0.0004,
|
33013 |
+
"loss": 5.0998,
|
33014 |
+
"step": 5414
|
33015 |
+
},
|
33016 |
+
{
|
33017 |
+
"epoch": 0.04,
|
33018 |
+
"learning_rate": 0.0004,
|
33019 |
+
"loss": 5.0552,
|
33020 |
+
"step": 5415
|
33021 |
+
},
|
33022 |
+
{
|
33023 |
+
"epoch": 0.04,
|
33024 |
+
"learning_rate": 0.0004,
|
33025 |
+
"loss": 3.956,
|
33026 |
+
"step": 5416
|
33027 |
+
},
|
33028 |
+
{
|
33029 |
+
"epoch": 0.04,
|
33030 |
+
"learning_rate": 0.0004,
|
33031 |
+
"loss": 7.4181,
|
33032 |
+
"step": 5417
|
33033 |
+
},
|
33034 |
+
{
|
33035 |
+
"epoch": 0.04,
|
33036 |
+
"learning_rate": 0.0004,
|
33037 |
+
"loss": 4.7352,
|
33038 |
+
"step": 5418
|
33039 |
+
},
|
33040 |
+
{
|
33041 |
+
"epoch": 0.04,
|
33042 |
+
"learning_rate": 0.0004,
|
33043 |
+
"loss": 6.7415,
|
33044 |
+
"step": 5419
|
33045 |
+
},
|
33046 |
+
{
|
33047 |
+
"epoch": 0.04,
|
33048 |
+
"learning_rate": 0.0004,
|
33049 |
+
"loss": 4.289,
|
33050 |
+
"step": 5420
|
33051 |
+
},
|
33052 |
+
{
|
33053 |
+
"epoch": 0.04,
|
33054 |
+
"learning_rate": 0.0004,
|
33055 |
+
"loss": 7.1359,
|
33056 |
+
"step": 5421
|
33057 |
+
},
|
33058 |
+
{
|
33059 |
+
"epoch": 0.04,
|
33060 |
+
"learning_rate": 0.0004,
|
33061 |
+
"loss": 3.1149,
|
33062 |
+
"step": 5422
|
33063 |
+
},
|
33064 |
+
{
|
33065 |
+
"epoch": 0.04,
|
33066 |
+
"learning_rate": 0.0004,
|
33067 |
+
"loss": 8.371,
|
33068 |
+
"step": 5423
|
33069 |
+
},
|
33070 |
+
{
|
33071 |
+
"epoch": 0.04,
|
33072 |
+
"learning_rate": 0.0004,
|
33073 |
+
"loss": 2.7619,
|
33074 |
+
"step": 5424
|
33075 |
+
},
|
33076 |
+
{
|
33077 |
+
"epoch": 0.04,
|
33078 |
+
"learning_rate": 0.0004,
|
33079 |
+
"loss": 5.3229,
|
33080 |
+
"step": 5425
|
33081 |
+
},
|
33082 |
+
{
|
33083 |
+
"epoch": 0.04,
|
33084 |
+
"learning_rate": 0.0004,
|
33085 |
+
"loss": 2.5643,
|
33086 |
+
"step": 5426
|
33087 |
+
},
|
33088 |
+
{
|
33089 |
+
"epoch": 0.04,
|
33090 |
+
"learning_rate": 0.0004,
|
33091 |
+
"loss": 6.8752,
|
33092 |
+
"step": 5427
|
33093 |
+
},
|
33094 |
+
{
|
33095 |
+
"epoch": 0.04,
|
33096 |
+
"learning_rate": 0.0004,
|
33097 |
+
"loss": 3.8785,
|
33098 |
+
"step": 5428
|
33099 |
+
},
|
33100 |
+
{
|
33101 |
+
"epoch": 0.04,
|
33102 |
+
"learning_rate": 0.0004,
|
33103 |
+
"loss": 5.06,
|
33104 |
+
"step": 5429
|
33105 |
+
},
|
33106 |
+
{
|
33107 |
+
"epoch": 0.04,
|
33108 |
+
"learning_rate": 0.0004,
|
33109 |
+
"loss": 6.7646,
|
33110 |
+
"step": 5430
|
33111 |
+
},
|
33112 |
+
{
|
33113 |
+
"epoch": 0.04,
|
33114 |
+
"learning_rate": 0.0004,
|
33115 |
+
"loss": 3.774,
|
33116 |
+
"step": 5431
|
33117 |
+
},
|
33118 |
+
{
|
33119 |
+
"epoch": 0.04,
|
33120 |
+
"learning_rate": 0.0004,
|
33121 |
+
"loss": 2.777,
|
33122 |
+
"step": 5432
|
33123 |
+
},
|
33124 |
+
{
|
33125 |
+
"epoch": 0.04,
|
33126 |
+
"learning_rate": 0.0004,
|
33127 |
+
"loss": 7.2203,
|
33128 |
+
"step": 5433
|
33129 |
+
},
|
33130 |
+
{
|
33131 |
+
"epoch": 0.04,
|
33132 |
+
"learning_rate": 0.0004,
|
33133 |
+
"loss": 3.7283,
|
33134 |
+
"step": 5434
|
33135 |
+
},
|
33136 |
+
{
|
33137 |
+
"epoch": 0.04,
|
33138 |
+
"learning_rate": 0.0004,
|
33139 |
+
"loss": 7.4205,
|
33140 |
+
"step": 5435
|
33141 |
+
},
|
33142 |
+
{
|
33143 |
+
"epoch": 0.04,
|
33144 |
+
"learning_rate": 0.0004,
|
33145 |
+
"loss": 3.7002,
|
33146 |
+
"step": 5436
|
33147 |
+
},
|
33148 |
+
{
|
33149 |
+
"epoch": 0.04,
|
33150 |
+
"learning_rate": 0.0004,
|
33151 |
+
"loss": 3.32,
|
33152 |
+
"step": 5437
|
33153 |
+
},
|
33154 |
+
{
|
33155 |
+
"epoch": 0.04,
|
33156 |
+
"learning_rate": 0.0004,
|
33157 |
+
"loss": 7.5895,
|
33158 |
+
"step": 5438
|
33159 |
+
},
|
33160 |
+
{
|
33161 |
+
"epoch": 0.04,
|
33162 |
+
"learning_rate": 0.0004,
|
33163 |
+
"loss": 2.5335,
|
33164 |
+
"step": 5439
|
33165 |
+
},
|
33166 |
+
{
|
33167 |
+
"epoch": 0.04,
|
33168 |
+
"learning_rate": 0.0004,
|
33169 |
+
"loss": 2.5173,
|
33170 |
+
"step": 5440
|
33171 |
+
},
|
33172 |
+
{
|
33173 |
+
"epoch": 0.04,
|
33174 |
+
"learning_rate": 0.0004,
|
33175 |
+
"loss": 7.3841,
|
33176 |
+
"step": 5441
|
33177 |
+
},
|
33178 |
+
{
|
33179 |
+
"epoch": 0.04,
|
33180 |
+
"learning_rate": 0.0004,
|
33181 |
+
"loss": 6.1075,
|
33182 |
+
"step": 5442
|
33183 |
+
},
|
33184 |
+
{
|
33185 |
+
"epoch": 0.04,
|
33186 |
+
"learning_rate": 0.0004,
|
33187 |
+
"loss": 8.4645,
|
33188 |
+
"step": 5443
|
33189 |
+
},
|
33190 |
+
{
|
33191 |
+
"epoch": 0.04,
|
33192 |
+
"learning_rate": 0.0004,
|
33193 |
+
"loss": 2.5685,
|
33194 |
+
"step": 5444
|
33195 |
+
},
|
33196 |
+
{
|
33197 |
+
"epoch": 0.04,
|
33198 |
+
"learning_rate": 0.0004,
|
33199 |
+
"loss": 3.2423,
|
33200 |
+
"step": 5445
|
33201 |
+
},
|
33202 |
+
{
|
33203 |
+
"epoch": 0.04,
|
33204 |
+
"learning_rate": 0.0004,
|
33205 |
+
"loss": 8.3062,
|
33206 |
+
"step": 5446
|
33207 |
+
},
|
33208 |
+
{
|
33209 |
+
"epoch": 0.04,
|
33210 |
+
"learning_rate": 0.0004,
|
33211 |
+
"loss": 4.195,
|
33212 |
+
"step": 5447
|
33213 |
+
},
|
33214 |
+
{
|
33215 |
+
"epoch": 0.04,
|
33216 |
+
"learning_rate": 0.0004,
|
33217 |
+
"loss": 4.1215,
|
33218 |
+
"step": 5448
|
33219 |
+
},
|
33220 |
+
{
|
33221 |
+
"epoch": 0.04,
|
33222 |
+
"learning_rate": 0.0004,
|
33223 |
+
"loss": 3.7096,
|
33224 |
+
"step": 5449
|
33225 |
+
},
|
33226 |
+
{
|
33227 |
+
"epoch": 0.04,
|
33228 |
+
"learning_rate": 0.0004,
|
33229 |
+
"loss": 4.8696,
|
33230 |
+
"step": 5450
|
33231 |
+
},
|
33232 |
+
{
|
33233 |
+
"epoch": 0.04,
|
33234 |
+
"learning_rate": 0.0004,
|
33235 |
+
"loss": 3.0856,
|
33236 |
+
"step": 5451
|
33237 |
+
},
|
33238 |
+
{
|
33239 |
+
"epoch": 0.04,
|
33240 |
+
"learning_rate": 0.0004,
|
33241 |
+
"loss": 8.131,
|
33242 |
+
"step": 5452
|
33243 |
+
},
|
33244 |
+
{
|
33245 |
+
"epoch": 0.04,
|
33246 |
+
"learning_rate": 0.0004,
|
33247 |
+
"loss": 6.9663,
|
33248 |
+
"step": 5453
|
33249 |
+
},
|
33250 |
+
{
|
33251 |
+
"epoch": 0.04,
|
33252 |
+
"learning_rate": 0.0004,
|
33253 |
+
"loss": 6.8655,
|
33254 |
+
"step": 5454
|
33255 |
+
},
|
33256 |
+
{
|
33257 |
+
"epoch": 0.04,
|
33258 |
+
"learning_rate": 0.0004,
|
33259 |
+
"loss": 7.4411,
|
33260 |
+
"step": 5455
|
33261 |
+
},
|
33262 |
+
{
|
33263 |
+
"epoch": 0.04,
|
33264 |
+
"learning_rate": 0.0004,
|
33265 |
+
"loss": 7.33,
|
33266 |
+
"step": 5456
|
33267 |
+
},
|
33268 |
+
{
|
33269 |
+
"epoch": 0.04,
|
33270 |
+
"learning_rate": 0.0004,
|
33271 |
+
"loss": 7.4933,
|
33272 |
+
"step": 5457
|
33273 |
+
},
|
33274 |
+
{
|
33275 |
+
"epoch": 0.04,
|
33276 |
+
"learning_rate": 0.0004,
|
33277 |
+
"loss": 3.6501,
|
33278 |
+
"step": 5458
|
33279 |
+
},
|
33280 |
+
{
|
33281 |
+
"epoch": 0.04,
|
33282 |
+
"learning_rate": 0.0004,
|
33283 |
+
"loss": 6.9743,
|
33284 |
+
"step": 5459
|
33285 |
+
},
|
33286 |
+
{
|
33287 |
+
"epoch": 0.04,
|
33288 |
+
"learning_rate": 0.0004,
|
33289 |
+
"loss": 9.029,
|
33290 |
+
"step": 5460
|
33291 |
+
},
|
33292 |
+
{
|
33293 |
+
"epoch": 0.04,
|
33294 |
+
"learning_rate": 0.0004,
|
33295 |
+
"loss": 6.8042,
|
33296 |
+
"step": 5461
|
33297 |
+
},
|
33298 |
+
{
|
33299 |
+
"epoch": 0.04,
|
33300 |
+
"learning_rate": 0.0004,
|
33301 |
+
"loss": 10.3662,
|
33302 |
+
"step": 5462
|
33303 |
+
},
|
33304 |
+
{
|
33305 |
+
"epoch": 0.04,
|
33306 |
+
"learning_rate": 0.0004,
|
33307 |
+
"loss": 6.5808,
|
33308 |
+
"step": 5463
|
33309 |
+
},
|
33310 |
+
{
|
33311 |
+
"epoch": 0.04,
|
33312 |
+
"learning_rate": 0.0004,
|
33313 |
+
"loss": 7.422,
|
33314 |
+
"step": 5464
|
33315 |
+
},
|
33316 |
+
{
|
33317 |
+
"epoch": 0.04,
|
33318 |
+
"learning_rate": 0.0004,
|
33319 |
+
"loss": 7.5076,
|
33320 |
+
"step": 5465
|
33321 |
+
},
|
33322 |
+
{
|
33323 |
+
"epoch": 0.04,
|
33324 |
+
"learning_rate": 0.0004,
|
33325 |
+
"loss": 6.0352,
|
33326 |
+
"step": 5466
|
33327 |
+
},
|
33328 |
+
{
|
33329 |
+
"epoch": 0.04,
|
33330 |
+
"learning_rate": 0.0004,
|
33331 |
+
"loss": 2.7639,
|
33332 |
+
"step": 5467
|
33333 |
+
},
|
33334 |
+
{
|
33335 |
+
"epoch": 0.04,
|
33336 |
+
"learning_rate": 0.0004,
|
33337 |
+
"loss": 4.9244,
|
33338 |
+
"step": 5468
|
33339 |
+
},
|
33340 |
+
{
|
33341 |
+
"epoch": 0.04,
|
33342 |
+
"learning_rate": 0.0004,
|
33343 |
+
"loss": 6.7408,
|
33344 |
+
"step": 5469
|
33345 |
+
},
|
33346 |
+
{
|
33347 |
+
"epoch": 0.04,
|
33348 |
+
"learning_rate": 0.0004,
|
33349 |
+
"loss": 4.7444,
|
33350 |
+
"step": 5470
|
33351 |
+
},
|
33352 |
+
{
|
33353 |
+
"epoch": 0.04,
|
33354 |
+
"learning_rate": 0.0004,
|
33355 |
+
"loss": 8.3459,
|
33356 |
+
"step": 5471
|
33357 |
+
},
|
33358 |
+
{
|
33359 |
+
"epoch": 0.04,
|
33360 |
+
"learning_rate": 0.0004,
|
33361 |
+
"loss": 6.9678,
|
33362 |
+
"step": 5472
|
33363 |
+
},
|
33364 |
+
{
|
33365 |
+
"epoch": 0.04,
|
33366 |
+
"learning_rate": 0.0004,
|
33367 |
+
"loss": 8.1263,
|
33368 |
+
"step": 5473
|
33369 |
+
},
|
33370 |
+
{
|
33371 |
+
"epoch": 0.04,
|
33372 |
+
"learning_rate": 0.0004,
|
33373 |
+
"loss": 6.1176,
|
33374 |
+
"step": 5474
|
33375 |
+
},
|
33376 |
+
{
|
33377 |
+
"epoch": 0.04,
|
33378 |
+
"learning_rate": 0.0004,
|
33379 |
+
"loss": 5.2127,
|
33380 |
+
"step": 5475
|
33381 |
+
},
|
33382 |
+
{
|
33383 |
+
"epoch": 0.04,
|
33384 |
+
"learning_rate": 0.0004,
|
33385 |
+
"loss": 3.1435,
|
33386 |
+
"step": 5476
|
33387 |
+
},
|
33388 |
+
{
|
33389 |
+
"epoch": 0.04,
|
33390 |
+
"learning_rate": 0.0004,
|
33391 |
+
"loss": 5.8836,
|
33392 |
+
"step": 5477
|
33393 |
+
},
|
33394 |
+
{
|
33395 |
+
"epoch": 0.04,
|
33396 |
+
"learning_rate": 0.0004,
|
33397 |
+
"loss": 2.7154,
|
33398 |
+
"step": 5478
|
33399 |
+
},
|
33400 |
+
{
|
33401 |
+
"epoch": 0.04,
|
33402 |
+
"learning_rate": 0.0004,
|
33403 |
+
"loss": 7.6181,
|
33404 |
+
"step": 5479
|
33405 |
+
},
|
33406 |
+
{
|
33407 |
+
"epoch": 0.04,
|
33408 |
+
"learning_rate": 0.0004,
|
33409 |
+
"loss": 3.5132,
|
33410 |
+
"step": 5480
|
33411 |
+
},
|
33412 |
+
{
|
33413 |
+
"epoch": 0.04,
|
33414 |
+
"learning_rate": 0.0004,
|
33415 |
+
"loss": 5.9472,
|
33416 |
+
"step": 5481
|
33417 |
+
},
|
33418 |
+
{
|
33419 |
+
"epoch": 0.04,
|
33420 |
+
"learning_rate": 0.0004,
|
33421 |
+
"loss": 2.7316,
|
33422 |
+
"step": 5482
|
33423 |
+
},
|
33424 |
+
{
|
33425 |
+
"epoch": 0.04,
|
33426 |
+
"learning_rate": 0.0004,
|
33427 |
+
"loss": 4.4713,
|
33428 |
+
"step": 5483
|
33429 |
+
},
|
33430 |
+
{
|
33431 |
+
"epoch": 0.04,
|
33432 |
+
"learning_rate": 0.0004,
|
33433 |
+
"loss": 8.4081,
|
33434 |
+
"step": 5484
|
33435 |
+
},
|
33436 |
+
{
|
33437 |
+
"epoch": 0.04,
|
33438 |
+
"learning_rate": 0.0004,
|
33439 |
+
"loss": 2.5906,
|
33440 |
+
"step": 5485
|
33441 |
+
},
|
33442 |
+
{
|
33443 |
+
"epoch": 0.04,
|
33444 |
+
"learning_rate": 0.0004,
|
33445 |
+
"loss": 7.8309,
|
33446 |
+
"step": 5486
|
33447 |
+
},
|
33448 |
+
{
|
33449 |
+
"epoch": 0.04,
|
33450 |
+
"learning_rate": 0.0004,
|
33451 |
+
"loss": 2.5541,
|
33452 |
+
"step": 5487
|
33453 |
+
},
|
33454 |
+
{
|
33455 |
+
"epoch": 0.04,
|
33456 |
+
"learning_rate": 0.0004,
|
33457 |
+
"loss": 2.6686,
|
33458 |
+
"step": 5488
|
33459 |
+
},
|
33460 |
+
{
|
33461 |
+
"epoch": 0.04,
|
33462 |
+
"learning_rate": 0.0004,
|
33463 |
+
"loss": 2.5044,
|
33464 |
+
"step": 5489
|
33465 |
+
},
|
33466 |
+
{
|
33467 |
+
"epoch": 0.04,
|
33468 |
+
"learning_rate": 0.0004,
|
33469 |
+
"loss": 6.8598,
|
33470 |
+
"step": 5490
|
33471 |
+
},
|
33472 |
+
{
|
33473 |
+
"epoch": 0.04,
|
33474 |
+
"learning_rate": 0.0004,
|
33475 |
+
"loss": 8.1069,
|
33476 |
+
"step": 5491
|
33477 |
+
},
|
33478 |
+
{
|
33479 |
+
"epoch": 0.04,
|
33480 |
+
"learning_rate": 0.0004,
|
33481 |
+
"loss": 9.3975,
|
33482 |
+
"step": 5492
|
33483 |
+
},
|
33484 |
+
{
|
33485 |
+
"epoch": 0.04,
|
33486 |
+
"learning_rate": 0.0004,
|
33487 |
+
"loss": 6.7921,
|
33488 |
+
"step": 5493
|
33489 |
+
},
|
33490 |
+
{
|
33491 |
+
"epoch": 0.04,
|
33492 |
+
"learning_rate": 0.0004,
|
33493 |
+
"loss": 5.8833,
|
33494 |
+
"step": 5494
|
33495 |
+
},
|
33496 |
+
{
|
33497 |
+
"epoch": 0.04,
|
33498 |
+
"learning_rate": 0.0004,
|
33499 |
+
"loss": 5.4129,
|
33500 |
+
"step": 5495
|
33501 |
+
},
|
33502 |
+
{
|
33503 |
+
"epoch": 0.04,
|
33504 |
+
"learning_rate": 0.0004,
|
33505 |
+
"loss": 5.6771,
|
33506 |
+
"step": 5496
|
33507 |
+
},
|
33508 |
+
{
|
33509 |
+
"epoch": 0.04,
|
33510 |
+
"learning_rate": 0.0004,
|
33511 |
+
"loss": 6.3949,
|
33512 |
+
"step": 5497
|
33513 |
+
},
|
33514 |
+
{
|
33515 |
+
"epoch": 0.04,
|
33516 |
+
"learning_rate": 0.0004,
|
33517 |
+
"loss": 7.5032,
|
33518 |
+
"step": 5498
|
33519 |
+
},
|
33520 |
+
{
|
33521 |
+
"epoch": 0.04,
|
33522 |
+
"learning_rate": 0.0004,
|
33523 |
+
"loss": 2.963,
|
33524 |
+
"step": 5499
|
33525 |
+
},
|
33526 |
+
{
|
33527 |
+
"epoch": 0.04,
|
33528 |
+
"learning_rate": 0.0004,
|
33529 |
+
"loss": 3.4149,
|
33530 |
+
"step": 5500
|
33531 |
+
},
|
33532 |
+
{
|
33533 |
+
"epoch": 0.04,
|
33534 |
+
"learning_rate": 0.0004,
|
33535 |
+
"loss": 4.0817,
|
33536 |
+
"step": 5501
|
33537 |
+
},
|
33538 |
+
{
|
33539 |
+
"epoch": 0.04,
|
33540 |
+
"learning_rate": 0.0004,
|
33541 |
+
"loss": 8.606,
|
33542 |
+
"step": 5502
|
33543 |
+
},
|
33544 |
+
{
|
33545 |
+
"epoch": 0.04,
|
33546 |
+
"learning_rate": 0.0004,
|
33547 |
+
"loss": 8.112,
|
33548 |
+
"step": 5503
|
33549 |
+
},
|
33550 |
+
{
|
33551 |
+
"epoch": 0.04,
|
33552 |
+
"learning_rate": 0.0004,
|
33553 |
+
"loss": 9.0323,
|
33554 |
+
"step": 5504
|
33555 |
+
},
|
33556 |
+
{
|
33557 |
+
"epoch": 0.04,
|
33558 |
+
"learning_rate": 0.0004,
|
33559 |
+
"loss": 5.0102,
|
33560 |
+
"step": 5505
|
33561 |
+
},
|
33562 |
+
{
|
33563 |
+
"epoch": 0.04,
|
33564 |
+
"learning_rate": 0.0004,
|
33565 |
+
"loss": 6.9195,
|
33566 |
+
"step": 5506
|
33567 |
+
},
|
33568 |
+
{
|
33569 |
+
"epoch": 0.04,
|
33570 |
+
"learning_rate": 0.0004,
|
33571 |
+
"loss": 8.0544,
|
33572 |
+
"step": 5507
|
33573 |
+
},
|
33574 |
+
{
|
33575 |
+
"epoch": 0.04,
|
33576 |
+
"learning_rate": 0.0004,
|
33577 |
+
"loss": 5.7615,
|
33578 |
+
"step": 5508
|
33579 |
+
},
|
33580 |
+
{
|
33581 |
+
"epoch": 0.04,
|
33582 |
+
"learning_rate": 0.0004,
|
33583 |
+
"loss": 6.9108,
|
33584 |
+
"step": 5509
|
33585 |
+
},
|
33586 |
+
{
|
33587 |
+
"epoch": 0.04,
|
33588 |
+
"learning_rate": 0.0004,
|
33589 |
+
"loss": 7.2846,
|
33590 |
+
"step": 5510
|
33591 |
+
},
|
33592 |
+
{
|
33593 |
+
"epoch": 0.04,
|
33594 |
+
"learning_rate": 0.0004,
|
33595 |
+
"loss": 8.1243,
|
33596 |
+
"step": 5511
|
33597 |
+
},
|
33598 |
+
{
|
33599 |
+
"epoch": 0.04,
|
33600 |
+
"learning_rate": 0.0004,
|
33601 |
+
"loss": 7.0416,
|
33602 |
+
"step": 5512
|
33603 |
+
},
|
33604 |
+
{
|
33605 |
+
"epoch": 0.04,
|
33606 |
+
"learning_rate": 0.0004,
|
33607 |
+
"loss": 5.9334,
|
33608 |
+
"step": 5513
|
33609 |
+
},
|
33610 |
+
{
|
33611 |
+
"epoch": 0.04,
|
33612 |
+
"learning_rate": 0.0004,
|
33613 |
+
"loss": 6.7127,
|
33614 |
+
"step": 5514
|
33615 |
+
},
|
33616 |
+
{
|
33617 |
+
"epoch": 0.04,
|
33618 |
+
"learning_rate": 0.0004,
|
33619 |
+
"loss": 3.2506,
|
33620 |
+
"step": 5515
|
33621 |
+
},
|
33622 |
+
{
|
33623 |
+
"epoch": 0.04,
|
33624 |
+
"learning_rate": 0.0004,
|
33625 |
+
"loss": 9.5912,
|
33626 |
+
"step": 5516
|
33627 |
+
},
|
33628 |
+
{
|
33629 |
+
"epoch": 0.04,
|
33630 |
+
"learning_rate": 0.0004,
|
33631 |
+
"loss": 3.1955,
|
33632 |
+
"step": 5517
|
33633 |
+
},
|
33634 |
+
{
|
33635 |
+
"epoch": 0.04,
|
33636 |
+
"learning_rate": 0.0004,
|
33637 |
+
"loss": 5.3704,
|
33638 |
+
"step": 5518
|
33639 |
+
},
|
33640 |
+
{
|
33641 |
+
"epoch": 0.04,
|
33642 |
+
"learning_rate": 0.0004,
|
33643 |
+
"loss": 5.775,
|
33644 |
+
"step": 5519
|
33645 |
+
},
|
33646 |
+
{
|
33647 |
+
"epoch": 0.04,
|
33648 |
+
"learning_rate": 0.0004,
|
33649 |
+
"loss": 5.4993,
|
33650 |
+
"step": 5520
|
33651 |
+
},
|
33652 |
+
{
|
33653 |
+
"epoch": 0.04,
|
33654 |
+
"learning_rate": 0.0004,
|
33655 |
+
"loss": 8.1517,
|
33656 |
+
"step": 5521
|
33657 |
+
},
|
33658 |
+
{
|
33659 |
+
"epoch": 0.04,
|
33660 |
+
"learning_rate": 0.0004,
|
33661 |
+
"loss": 6.7803,
|
33662 |
+
"step": 5522
|
33663 |
+
},
|
33664 |
+
{
|
33665 |
+
"epoch": 0.04,
|
33666 |
+
"learning_rate": 0.0004,
|
33667 |
+
"loss": 5.2405,
|
33668 |
+
"step": 5523
|
33669 |
+
},
|
33670 |
+
{
|
33671 |
+
"epoch": 0.04,
|
33672 |
+
"learning_rate": 0.0004,
|
33673 |
+
"loss": 3.6089,
|
33674 |
+
"step": 5524
|
33675 |
+
},
|
33676 |
+
{
|
33677 |
+
"epoch": 0.04,
|
33678 |
+
"learning_rate": 0.0004,
|
33679 |
+
"loss": 6.3463,
|
33680 |
+
"step": 5525
|
33681 |
+
},
|
33682 |
+
{
|
33683 |
+
"epoch": 0.04,
|
33684 |
+
"learning_rate": 0.0004,
|
33685 |
+
"loss": 8.8214,
|
33686 |
+
"step": 5526
|
33687 |
+
},
|
33688 |
+
{
|
33689 |
+
"epoch": 0.04,
|
33690 |
+
"learning_rate": 0.0004,
|
33691 |
+
"loss": 7.0789,
|
33692 |
+
"step": 5527
|
33693 |
+
},
|
33694 |
+
{
|
33695 |
+
"epoch": 0.04,
|
33696 |
+
"learning_rate": 0.0004,
|
33697 |
+
"loss": 4.0443,
|
33698 |
+
"step": 5528
|
33699 |
+
},
|
33700 |
+
{
|
33701 |
+
"epoch": 0.04,
|
33702 |
+
"learning_rate": 0.0004,
|
33703 |
+
"loss": 2.9387,
|
33704 |
+
"step": 5529
|
33705 |
+
},
|
33706 |
+
{
|
33707 |
+
"epoch": 0.04,
|
33708 |
+
"learning_rate": 0.0004,
|
33709 |
+
"loss": 3.3787,
|
33710 |
+
"step": 5530
|
33711 |
+
},
|
33712 |
+
{
|
33713 |
+
"epoch": 0.04,
|
33714 |
+
"learning_rate": 0.0004,
|
33715 |
+
"loss": 3.2718,
|
33716 |
+
"step": 5531
|
33717 |
+
},
|
33718 |
+
{
|
33719 |
+
"epoch": 0.04,
|
33720 |
+
"learning_rate": 0.0004,
|
33721 |
+
"loss": 7.1476,
|
33722 |
+
"step": 5532
|
33723 |
+
},
|
33724 |
+
{
|
33725 |
+
"epoch": 0.04,
|
33726 |
+
"learning_rate": 0.0004,
|
33727 |
+
"loss": 3.1862,
|
33728 |
+
"step": 5533
|
33729 |
+
},
|
33730 |
+
{
|
33731 |
+
"epoch": 0.04,
|
33732 |
+
"learning_rate": 0.0004,
|
33733 |
+
"loss": 7.9094,
|
33734 |
+
"step": 5534
|
33735 |
+
},
|
33736 |
+
{
|
33737 |
+
"epoch": 0.04,
|
33738 |
+
"learning_rate": 0.0004,
|
33739 |
+
"loss": 2.6915,
|
33740 |
+
"step": 5535
|
33741 |
+
},
|
33742 |
+
{
|
33743 |
+
"epoch": 0.04,
|
33744 |
+
"learning_rate": 0.0004,
|
33745 |
+
"loss": 6.4335,
|
33746 |
+
"step": 5536
|
33747 |
+
},
|
33748 |
+
{
|
33749 |
+
"epoch": 0.04,
|
33750 |
+
"learning_rate": 0.0004,
|
33751 |
+
"loss": 3.5026,
|
33752 |
+
"step": 5537
|
33753 |
+
},
|
33754 |
+
{
|
33755 |
+
"epoch": 0.04,
|
33756 |
+
"learning_rate": 0.0004,
|
33757 |
+
"loss": 3.4137,
|
33758 |
+
"step": 5538
|
33759 |
+
},
|
33760 |
+
{
|
33761 |
+
"epoch": 0.04,
|
33762 |
+
"learning_rate": 0.0004,
|
33763 |
+
"loss": 2.4747,
|
33764 |
+
"step": 5539
|
33765 |
+
},
|
33766 |
+
{
|
33767 |
+
"epoch": 0.04,
|
33768 |
+
"learning_rate": 0.0004,
|
33769 |
+
"loss": 2.8525,
|
33770 |
+
"step": 5540
|
33771 |
+
},
|
33772 |
+
{
|
33773 |
+
"epoch": 0.04,
|
33774 |
+
"learning_rate": 0.0004,
|
33775 |
+
"loss": 7.0023,
|
33776 |
+
"step": 5541
|
33777 |
+
},
|
33778 |
+
{
|
33779 |
+
"epoch": 0.04,
|
33780 |
+
"learning_rate": 0.0004,
|
33781 |
+
"loss": 6.7906,
|
33782 |
+
"step": 5542
|
33783 |
+
},
|
33784 |
+
{
|
33785 |
+
"epoch": 0.04,
|
33786 |
+
"learning_rate": 0.0004,
|
33787 |
+
"loss": 6.4716,
|
33788 |
+
"step": 5543
|
33789 |
+
},
|
33790 |
+
{
|
33791 |
+
"epoch": 0.04,
|
33792 |
+
"learning_rate": 0.0004,
|
33793 |
+
"loss": 2.437,
|
33794 |
+
"step": 5544
|
33795 |
+
},
|
33796 |
+
{
|
33797 |
+
"epoch": 0.04,
|
33798 |
+
"learning_rate": 0.0004,
|
33799 |
+
"loss": 2.1311,
|
33800 |
+
"step": 5545
|
33801 |
+
},
|
33802 |
+
{
|
33803 |
+
"epoch": 0.04,
|
33804 |
+
"learning_rate": 0.0004,
|
33805 |
+
"loss": 5.2937,
|
33806 |
+
"step": 5546
|
33807 |
+
},
|
33808 |
+
{
|
33809 |
+
"epoch": 0.04,
|
33810 |
+
"learning_rate": 0.0004,
|
33811 |
+
"loss": 8.6679,
|
33812 |
+
"step": 5547
|
33813 |
+
},
|
33814 |
+
{
|
33815 |
+
"epoch": 0.04,
|
33816 |
+
"learning_rate": 0.0004,
|
33817 |
+
"loss": 6.9048,
|
33818 |
+
"step": 5548
|
33819 |
+
},
|
33820 |
+
{
|
33821 |
+
"epoch": 0.04,
|
33822 |
+
"learning_rate": 0.0004,
|
33823 |
+
"loss": 3.9706,
|
33824 |
+
"step": 5549
|
33825 |
+
},
|
33826 |
+
{
|
33827 |
+
"epoch": 0.04,
|
33828 |
+
"learning_rate": 0.0004,
|
33829 |
+
"loss": 5.5848,
|
33830 |
+
"step": 5550
|
33831 |
+
},
|
33832 |
+
{
|
33833 |
+
"epoch": 0.04,
|
33834 |
+
"learning_rate": 0.0004,
|
33835 |
+
"loss": 9.3629,
|
33836 |
+
"step": 5551
|
33837 |
+
},
|
33838 |
+
{
|
33839 |
+
"epoch": 0.04,
|
33840 |
+
"learning_rate": 0.0004,
|
33841 |
+
"loss": 8.8409,
|
33842 |
+
"step": 5552
|
33843 |
+
},
|
33844 |
+
{
|
33845 |
+
"epoch": 0.04,
|
33846 |
+
"learning_rate": 0.0004,
|
33847 |
+
"loss": 7.8026,
|
33848 |
+
"step": 5553
|
33849 |
+
},
|
33850 |
+
{
|
33851 |
+
"epoch": 0.04,
|
33852 |
+
"learning_rate": 0.0004,
|
33853 |
+
"loss": 6.7644,
|
33854 |
+
"step": 5554
|
33855 |
+
},
|
33856 |
+
{
|
33857 |
+
"epoch": 0.04,
|
33858 |
+
"learning_rate": 0.0004,
|
33859 |
+
"loss": 7.3403,
|
33860 |
+
"step": 5555
|
33861 |
+
},
|
33862 |
+
{
|
33863 |
+
"epoch": 0.04,
|
33864 |
+
"learning_rate": 0.0004,
|
33865 |
+
"loss": 8.8025,
|
33866 |
+
"step": 5556
|
33867 |
+
},
|
33868 |
+
{
|
33869 |
+
"epoch": 0.04,
|
33870 |
+
"learning_rate": 0.0004,
|
33871 |
+
"loss": 7.1454,
|
33872 |
+
"step": 5557
|
33873 |
+
},
|
33874 |
+
{
|
33875 |
+
"epoch": 0.04,
|
33876 |
+
"learning_rate": 0.0004,
|
33877 |
+
"loss": 7.9874,
|
33878 |
+
"step": 5558
|
33879 |
+
},
|
33880 |
+
{
|
33881 |
+
"epoch": 0.04,
|
33882 |
+
"learning_rate": 0.0004,
|
33883 |
+
"loss": 7.5292,
|
33884 |
+
"step": 5559
|
33885 |
+
},
|
33886 |
+
{
|
33887 |
+
"epoch": 0.04,
|
33888 |
+
"learning_rate": 0.0004,
|
33889 |
+
"loss": 6.8845,
|
33890 |
+
"step": 5560
|
33891 |
+
},
|
33892 |
+
{
|
33893 |
+
"epoch": 0.04,
|
33894 |
+
"learning_rate": 0.0004,
|
33895 |
+
"loss": 5.9432,
|
33896 |
+
"step": 5561
|
33897 |
+
},
|
33898 |
+
{
|
33899 |
+
"epoch": 0.04,
|
33900 |
+
"learning_rate": 0.0004,
|
33901 |
+
"loss": 5.5414,
|
33902 |
+
"step": 5562
|
33903 |
+
},
|
33904 |
+
{
|
33905 |
+
"epoch": 0.04,
|
33906 |
+
"learning_rate": 0.0004,
|
33907 |
+
"loss": 7.7734,
|
33908 |
+
"step": 5563
|
33909 |
+
},
|
33910 |
+
{
|
33911 |
+
"epoch": 0.04,
|
33912 |
+
"learning_rate": 0.0004,
|
33913 |
+
"loss": 4.0763,
|
33914 |
+
"step": 5564
|
33915 |
+
},
|
33916 |
+
{
|
33917 |
+
"epoch": 0.04,
|
33918 |
+
"learning_rate": 0.0004,
|
33919 |
+
"loss": 6.5921,
|
33920 |
+
"step": 5565
|
33921 |
+
},
|
33922 |
+
{
|
33923 |
+
"epoch": 0.04,
|
33924 |
+
"learning_rate": 0.0004,
|
33925 |
+
"loss": 6.3899,
|
33926 |
+
"step": 5566
|
33927 |
+
},
|
33928 |
+
{
|
33929 |
+
"epoch": 0.04,
|
33930 |
+
"learning_rate": 0.0004,
|
33931 |
+
"loss": 8.175,
|
33932 |
+
"step": 5567
|
33933 |
+
},
|
33934 |
+
{
|
33935 |
+
"epoch": 0.04,
|
33936 |
+
"learning_rate": 0.0004,
|
33937 |
+
"loss": 6.3888,
|
33938 |
+
"step": 5568
|
33939 |
+
},
|
33940 |
+
{
|
33941 |
+
"epoch": 0.04,
|
33942 |
+
"learning_rate": 0.0004,
|
33943 |
+
"loss": 6.1035,
|
33944 |
+
"step": 5569
|
33945 |
+
},
|
33946 |
+
{
|
33947 |
+
"epoch": 0.04,
|
33948 |
+
"learning_rate": 0.0004,
|
33949 |
+
"loss": 7.9351,
|
33950 |
+
"step": 5570
|
33951 |
+
},
|
33952 |
+
{
|
33953 |
+
"epoch": 0.04,
|
33954 |
+
"learning_rate": 0.0004,
|
33955 |
+
"loss": 3.6726,
|
33956 |
+
"step": 5571
|
33957 |
+
},
|
33958 |
+
{
|
33959 |
+
"epoch": 0.04,
|
33960 |
+
"learning_rate": 0.0004,
|
33961 |
+
"loss": 10.7635,
|
33962 |
+
"step": 5572
|
33963 |
+
},
|
33964 |
+
{
|
33965 |
+
"epoch": 0.04,
|
33966 |
+
"learning_rate": 0.0004,
|
33967 |
+
"loss": 7.6501,
|
33968 |
+
"step": 5573
|
33969 |
+
},
|
33970 |
+
{
|
33971 |
+
"epoch": 0.04,
|
33972 |
+
"learning_rate": 0.0004,
|
33973 |
+
"loss": 8.5995,
|
33974 |
+
"step": 5574
|
33975 |
+
},
|
33976 |
+
{
|
33977 |
+
"epoch": 0.04,
|
33978 |
+
"learning_rate": 0.0004,
|
33979 |
+
"loss": 7.9299,
|
33980 |
+
"step": 5575
|
33981 |
+
},
|
33982 |
+
{
|
33983 |
+
"epoch": 0.04,
|
33984 |
+
"learning_rate": 0.0004,
|
33985 |
+
"loss": 7.6476,
|
33986 |
+
"step": 5576
|
33987 |
+
},
|
33988 |
+
{
|
33989 |
+
"epoch": 0.04,
|
33990 |
+
"learning_rate": 0.0004,
|
33991 |
+
"loss": 3.6735,
|
33992 |
+
"step": 5577
|
33993 |
+
},
|
33994 |
+
{
|
33995 |
+
"epoch": 0.04,
|
33996 |
+
"learning_rate": 0.0004,
|
33997 |
+
"loss": 3.5287,
|
33998 |
+
"step": 5578
|
33999 |
+
},
|
34000 |
+
{
|
34001 |
+
"epoch": 0.04,
|
34002 |
+
"learning_rate": 0.0004,
|
34003 |
+
"loss": 3.2667,
|
34004 |
+
"step": 5579
|
34005 |
+
},
|
34006 |
+
{
|
34007 |
+
"epoch": 0.04,
|
34008 |
+
"learning_rate": 0.0004,
|
34009 |
+
"loss": 4.7265,
|
34010 |
+
"step": 5580
|
34011 |
+
},
|
34012 |
+
{
|
34013 |
+
"epoch": 0.04,
|
34014 |
+
"learning_rate": 0.0004,
|
34015 |
+
"loss": 4.2115,
|
34016 |
+
"step": 5581
|
34017 |
+
},
|
34018 |
+
{
|
34019 |
+
"epoch": 0.04,
|
34020 |
+
"learning_rate": 0.0004,
|
34021 |
+
"loss": 5.5158,
|
34022 |
+
"step": 5582
|
34023 |
+
},
|
34024 |
+
{
|
34025 |
+
"epoch": 0.04,
|
34026 |
+
"learning_rate": 0.0004,
|
34027 |
+
"loss": 6.8307,
|
34028 |
+
"step": 5583
|
34029 |
+
},
|
34030 |
+
{
|
34031 |
+
"epoch": 0.04,
|
34032 |
+
"learning_rate": 0.0004,
|
34033 |
+
"loss": 4.8515,
|
34034 |
+
"step": 5584
|
34035 |
+
},
|
34036 |
+
{
|
34037 |
+
"epoch": 0.04,
|
34038 |
+
"learning_rate": 0.0004,
|
34039 |
+
"loss": 7.112,
|
34040 |
+
"step": 5585
|
34041 |
+
},
|
34042 |
+
{
|
34043 |
+
"epoch": 0.04,
|
34044 |
+
"learning_rate": 0.0004,
|
34045 |
+
"loss": 6.6433,
|
34046 |
+
"step": 5586
|
34047 |
+
},
|
34048 |
+
{
|
34049 |
+
"epoch": 0.04,
|
34050 |
+
"learning_rate": 0.0004,
|
34051 |
+
"loss": 7.7307,
|
34052 |
+
"step": 5587
|
34053 |
+
},
|
34054 |
+
{
|
34055 |
+
"epoch": 0.04,
|
34056 |
+
"learning_rate": 0.0004,
|
34057 |
+
"loss": 5.9845,
|
34058 |
+
"step": 5588
|
34059 |
+
},
|
34060 |
+
{
|
34061 |
+
"epoch": 0.04,
|
34062 |
+
"learning_rate": 0.0004,
|
34063 |
+
"loss": 7.029,
|
34064 |
+
"step": 5589
|
34065 |
+
},
|
34066 |
+
{
|
34067 |
+
"epoch": 0.04,
|
34068 |
+
"learning_rate": 0.0004,
|
34069 |
+
"loss": 4.3792,
|
34070 |
+
"step": 5590
|
34071 |
+
},
|
34072 |
+
{
|
34073 |
+
"epoch": 0.04,
|
34074 |
+
"learning_rate": 0.0004,
|
34075 |
+
"loss": 5.3602,
|
34076 |
+
"step": 5591
|
34077 |
+
},
|
34078 |
+
{
|
34079 |
+
"epoch": 0.04,
|
34080 |
+
"learning_rate": 0.0004,
|
34081 |
+
"loss": 3.9109,
|
34082 |
+
"step": 5592
|
34083 |
+
},
|
34084 |
+
{
|
34085 |
+
"epoch": 0.04,
|
34086 |
+
"learning_rate": 0.0004,
|
34087 |
+
"loss": 5.5172,
|
34088 |
+
"step": 5593
|
34089 |
+
},
|
34090 |
+
{
|
34091 |
+
"epoch": 0.04,
|
34092 |
+
"learning_rate": 0.0004,
|
34093 |
+
"loss": 3.9592,
|
34094 |
+
"step": 5594
|
34095 |
+
},
|
34096 |
+
{
|
34097 |
+
"epoch": 0.04,
|
34098 |
+
"learning_rate": 0.0004,
|
34099 |
+
"loss": 4.125,
|
34100 |
+
"step": 5595
|
34101 |
+
},
|
34102 |
+
{
|
34103 |
+
"epoch": 0.04,
|
34104 |
+
"learning_rate": 0.0004,
|
34105 |
+
"loss": 5.3487,
|
34106 |
+
"step": 5596
|
34107 |
+
},
|
34108 |
+
{
|
34109 |
+
"epoch": 0.04,
|
34110 |
+
"learning_rate": 0.0004,
|
34111 |
+
"loss": 6.7272,
|
34112 |
+
"step": 5597
|
34113 |
+
},
|
34114 |
+
{
|
34115 |
+
"epoch": 0.04,
|
34116 |
+
"learning_rate": 0.0004,
|
34117 |
+
"loss": 5.6483,
|
34118 |
+
"step": 5598
|
34119 |
+
},
|
34120 |
+
{
|
34121 |
+
"epoch": 0.04,
|
34122 |
+
"learning_rate": 0.0004,
|
34123 |
+
"loss": 2.811,
|
34124 |
+
"step": 5599
|
34125 |
+
},
|
34126 |
+
{
|
34127 |
+
"epoch": 0.04,
|
34128 |
+
"learning_rate": 0.0004,
|
34129 |
+
"loss": 7.2748,
|
34130 |
+
"step": 5600
|
34131 |
+
},
|
34132 |
+
{
|
34133 |
+
"epoch": 0.04,
|
34134 |
+
"eval_loss": 6.424686431884766,
|
34135 |
+
"eval_runtime": 22.4149,
|
34136 |
+
"eval_samples_per_second": 2.231,
|
34137 |
+
"eval_steps_per_second": 1.115,
|
34138 |
+
"step": 5600
|
34139 |
+
},
|
34140 |
+
{
|
34141 |
+
"epoch": 0.04,
|
34142 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
34143 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
34144 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
34145 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
34146 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
34147 |
+
"mmlu_loss": 3.697023420333862,
|
34148 |
+
"step": 5600
|
34149 |
+
},
|
34150 |
+
{
|
34151 |
+
"epoch": 0.04,
|
34152 |
+
"learning_rate": 0.0004,
|
34153 |
+
"loss": 7.9876,
|
34154 |
+
"step": 5601
|
34155 |
+
},
|
34156 |
+
{
|
34157 |
+
"epoch": 0.04,
|
34158 |
+
"learning_rate": 0.0004,
|
34159 |
+
"loss": 4.0035,
|
34160 |
+
"step": 5602
|
34161 |
+
},
|
34162 |
+
{
|
34163 |
+
"epoch": 0.04,
|
34164 |
+
"learning_rate": 0.0004,
|
34165 |
+
"loss": 9.1667,
|
34166 |
+
"step": 5603
|
34167 |
+
},
|
34168 |
+
{
|
34169 |
+
"epoch": 0.04,
|
34170 |
+
"learning_rate": 0.0004,
|
34171 |
+
"loss": 3.504,
|
34172 |
+
"step": 5604
|
34173 |
+
},
|
34174 |
+
{
|
34175 |
+
"epoch": 0.04,
|
34176 |
+
"learning_rate": 0.0004,
|
34177 |
+
"loss": 8.1787,
|
34178 |
+
"step": 5605
|
34179 |
+
},
|
34180 |
+
{
|
34181 |
+
"epoch": 0.04,
|
34182 |
+
"learning_rate": 0.0004,
|
34183 |
+
"loss": 5.9815,
|
34184 |
+
"step": 5606
|
34185 |
+
},
|
34186 |
+
{
|
34187 |
+
"epoch": 0.04,
|
34188 |
+
"learning_rate": 0.0004,
|
34189 |
+
"loss": 7.3385,
|
34190 |
+
"step": 5607
|
34191 |
+
},
|
34192 |
+
{
|
34193 |
+
"epoch": 0.04,
|
34194 |
+
"learning_rate": 0.0004,
|
34195 |
+
"loss": 8.861,
|
34196 |
+
"step": 5608
|
34197 |
+
},
|
34198 |
+
{
|
34199 |
+
"epoch": 0.04,
|
34200 |
+
"learning_rate": 0.0004,
|
34201 |
+
"loss": 7.4176,
|
34202 |
+
"step": 5609
|
34203 |
+
},
|
34204 |
+
{
|
34205 |
+
"epoch": 0.04,
|
34206 |
+
"learning_rate": 0.0004,
|
34207 |
+
"loss": 8.1611,
|
34208 |
+
"step": 5610
|
34209 |
+
},
|
34210 |
+
{
|
34211 |
+
"epoch": 0.04,
|
34212 |
+
"learning_rate": 0.0004,
|
34213 |
+
"loss": 4.066,
|
34214 |
+
"step": 5611
|
34215 |
+
},
|
34216 |
+
{
|
34217 |
+
"epoch": 0.04,
|
34218 |
+
"learning_rate": 0.0004,
|
34219 |
+
"loss": 3.022,
|
34220 |
+
"step": 5612
|
34221 |
+
},
|
34222 |
+
{
|
34223 |
+
"epoch": 0.04,
|
34224 |
+
"learning_rate": 0.0004,
|
34225 |
+
"loss": 6.3958,
|
34226 |
+
"step": 5613
|
34227 |
+
},
|
34228 |
+
{
|
34229 |
+
"epoch": 0.04,
|
34230 |
+
"learning_rate": 0.0004,
|
34231 |
+
"loss": 7.3849,
|
34232 |
+
"step": 5614
|
34233 |
+
},
|
34234 |
+
{
|
34235 |
+
"epoch": 0.04,
|
34236 |
+
"learning_rate": 0.0004,
|
34237 |
+
"loss": 6.97,
|
34238 |
+
"step": 5615
|
34239 |
+
},
|
34240 |
+
{
|
34241 |
+
"epoch": 0.04,
|
34242 |
+
"learning_rate": 0.0004,
|
34243 |
+
"loss": 6.9633,
|
34244 |
+
"step": 5616
|
34245 |
+
},
|
34246 |
+
{
|
34247 |
+
"epoch": 0.04,
|
34248 |
+
"learning_rate": 0.0004,
|
34249 |
+
"loss": 6.1699,
|
34250 |
+
"step": 5617
|
34251 |
+
},
|
34252 |
+
{
|
34253 |
+
"epoch": 0.04,
|
34254 |
+
"learning_rate": 0.0004,
|
34255 |
+
"loss": 5.2956,
|
34256 |
+
"step": 5618
|
34257 |
+
},
|
34258 |
+
{
|
34259 |
+
"epoch": 0.04,
|
34260 |
+
"learning_rate": 0.0004,
|
34261 |
+
"loss": 6.3328,
|
34262 |
+
"step": 5619
|
34263 |
+
},
|
34264 |
+
{
|
34265 |
+
"epoch": 0.04,
|
34266 |
+
"learning_rate": 0.0004,
|
34267 |
+
"loss": 6.392,
|
34268 |
+
"step": 5620
|
34269 |
+
},
|
34270 |
+
{
|
34271 |
+
"epoch": 0.04,
|
34272 |
+
"learning_rate": 0.0004,
|
34273 |
+
"loss": 3.0549,
|
34274 |
+
"step": 5621
|
34275 |
+
},
|
34276 |
+
{
|
34277 |
+
"epoch": 0.04,
|
34278 |
+
"learning_rate": 0.0004,
|
34279 |
+
"loss": 3.0383,
|
34280 |
+
"step": 5622
|
34281 |
+
},
|
34282 |
+
{
|
34283 |
+
"epoch": 0.04,
|
34284 |
+
"learning_rate": 0.0004,
|
34285 |
+
"loss": 5.7526,
|
34286 |
+
"step": 5623
|
34287 |
+
},
|
34288 |
+
{
|
34289 |
+
"epoch": 0.04,
|
34290 |
+
"learning_rate": 0.0004,
|
34291 |
+
"loss": 8.3642,
|
34292 |
+
"step": 5624
|
34293 |
+
},
|
34294 |
+
{
|
34295 |
+
"epoch": 0.04,
|
34296 |
+
"learning_rate": 0.0004,
|
34297 |
+
"loss": 7.4075,
|
34298 |
+
"step": 5625
|
34299 |
+
},
|
34300 |
+
{
|
34301 |
+
"epoch": 0.04,
|
34302 |
+
"learning_rate": 0.0004,
|
34303 |
+
"loss": 5.4872,
|
34304 |
+
"step": 5626
|
34305 |
+
},
|
34306 |
+
{
|
34307 |
+
"epoch": 0.04,
|
34308 |
+
"learning_rate": 0.0004,
|
34309 |
+
"loss": 5.468,
|
34310 |
+
"step": 5627
|
34311 |
+
},
|
34312 |
+
{
|
34313 |
+
"epoch": 0.04,
|
34314 |
+
"learning_rate": 0.0004,
|
34315 |
+
"loss": 7.0869,
|
34316 |
+
"step": 5628
|
34317 |
+
},
|
34318 |
+
{
|
34319 |
+
"epoch": 0.04,
|
34320 |
+
"learning_rate": 0.0004,
|
34321 |
+
"loss": 5.0191,
|
34322 |
+
"step": 5629
|
34323 |
+
},
|
34324 |
+
{
|
34325 |
+
"epoch": 0.04,
|
34326 |
+
"learning_rate": 0.0004,
|
34327 |
+
"loss": 6.4106,
|
34328 |
+
"step": 5630
|
34329 |
+
},
|
34330 |
+
{
|
34331 |
+
"epoch": 0.04,
|
34332 |
+
"learning_rate": 0.0004,
|
34333 |
+
"loss": 3.9285,
|
34334 |
+
"step": 5631
|
34335 |
+
},
|
34336 |
+
{
|
34337 |
+
"epoch": 0.04,
|
34338 |
+
"learning_rate": 0.0004,
|
34339 |
+
"loss": 6.4914,
|
34340 |
+
"step": 5632
|
34341 |
+
},
|
34342 |
+
{
|
34343 |
+
"epoch": 0.04,
|
34344 |
+
"learning_rate": 0.0004,
|
34345 |
+
"loss": 6.6292,
|
34346 |
+
"step": 5633
|
34347 |
+
},
|
34348 |
+
{
|
34349 |
+
"epoch": 0.04,
|
34350 |
+
"learning_rate": 0.0004,
|
34351 |
+
"loss": 8.1575,
|
34352 |
+
"step": 5634
|
34353 |
+
},
|
34354 |
+
{
|
34355 |
+
"epoch": 0.04,
|
34356 |
+
"learning_rate": 0.0004,
|
34357 |
+
"loss": 5.2383,
|
34358 |
+
"step": 5635
|
34359 |
+
},
|
34360 |
+
{
|
34361 |
+
"epoch": 0.04,
|
34362 |
+
"learning_rate": 0.0004,
|
34363 |
+
"loss": 7.0047,
|
34364 |
+
"step": 5636
|
34365 |
+
},
|
34366 |
+
{
|
34367 |
+
"epoch": 0.04,
|
34368 |
+
"learning_rate": 0.0004,
|
34369 |
+
"loss": 8.2193,
|
34370 |
+
"step": 5637
|
34371 |
+
},
|
34372 |
+
{
|
34373 |
+
"epoch": 0.04,
|
34374 |
+
"learning_rate": 0.0004,
|
34375 |
+
"loss": 2.6003,
|
34376 |
+
"step": 5638
|
34377 |
+
},
|
34378 |
+
{
|
34379 |
+
"epoch": 0.04,
|
34380 |
+
"learning_rate": 0.0004,
|
34381 |
+
"loss": 5.394,
|
34382 |
+
"step": 5639
|
34383 |
+
},
|
34384 |
+
{
|
34385 |
+
"epoch": 0.04,
|
34386 |
+
"learning_rate": 0.0004,
|
34387 |
+
"loss": 2.5271,
|
34388 |
+
"step": 5640
|
34389 |
+
},
|
34390 |
+
{
|
34391 |
+
"epoch": 0.04,
|
34392 |
+
"learning_rate": 0.0004,
|
34393 |
+
"loss": 3.5289,
|
34394 |
+
"step": 5641
|
34395 |
+
},
|
34396 |
+
{
|
34397 |
+
"epoch": 0.04,
|
34398 |
+
"learning_rate": 0.0004,
|
34399 |
+
"loss": 5.5975,
|
34400 |
+
"step": 5642
|
34401 |
+
},
|
34402 |
+
{
|
34403 |
+
"epoch": 0.04,
|
34404 |
+
"learning_rate": 0.0004,
|
34405 |
+
"loss": 4.8294,
|
34406 |
+
"step": 5643
|
34407 |
+
},
|
34408 |
+
{
|
34409 |
+
"epoch": 0.04,
|
34410 |
+
"learning_rate": 0.0004,
|
34411 |
+
"loss": 4.3174,
|
34412 |
+
"step": 5644
|
34413 |
+
},
|
34414 |
+
{
|
34415 |
+
"epoch": 0.04,
|
34416 |
+
"learning_rate": 0.0004,
|
34417 |
+
"loss": 6.1406,
|
34418 |
+
"step": 5645
|
34419 |
+
},
|
34420 |
+
{
|
34421 |
+
"epoch": 0.04,
|
34422 |
+
"learning_rate": 0.0004,
|
34423 |
+
"loss": 3.8035,
|
34424 |
+
"step": 5646
|
34425 |
+
},
|
34426 |
+
{
|
34427 |
+
"epoch": 0.04,
|
34428 |
+
"learning_rate": 0.0004,
|
34429 |
+
"loss": 4.709,
|
34430 |
+
"step": 5647
|
34431 |
+
},
|
34432 |
+
{
|
34433 |
+
"epoch": 0.04,
|
34434 |
+
"learning_rate": 0.0004,
|
34435 |
+
"loss": 6.5739,
|
34436 |
+
"step": 5648
|
34437 |
+
},
|
34438 |
+
{
|
34439 |
+
"epoch": 0.04,
|
34440 |
+
"learning_rate": 0.0004,
|
34441 |
+
"loss": 2.2544,
|
34442 |
+
"step": 5649
|
34443 |
+
},
|
34444 |
+
{
|
34445 |
+
"epoch": 0.04,
|
34446 |
+
"learning_rate": 0.0004,
|
34447 |
+
"loss": 2.8751,
|
34448 |
+
"step": 5650
|
34449 |
+
},
|
34450 |
+
{
|
34451 |
+
"epoch": 0.04,
|
34452 |
+
"learning_rate": 0.0004,
|
34453 |
+
"loss": 8.6977,
|
34454 |
+
"step": 5651
|
34455 |
+
},
|
34456 |
+
{
|
34457 |
+
"epoch": 0.04,
|
34458 |
+
"learning_rate": 0.0004,
|
34459 |
+
"loss": 7.6419,
|
34460 |
+
"step": 5652
|
34461 |
+
},
|
34462 |
+
{
|
34463 |
+
"epoch": 0.04,
|
34464 |
+
"learning_rate": 0.0004,
|
34465 |
+
"loss": 6.7223,
|
34466 |
+
"step": 5653
|
34467 |
+
},
|
34468 |
+
{
|
34469 |
+
"epoch": 0.04,
|
34470 |
+
"learning_rate": 0.0004,
|
34471 |
+
"loss": 7.1536,
|
34472 |
+
"step": 5654
|
34473 |
+
},
|
34474 |
+
{
|
34475 |
+
"epoch": 0.04,
|
34476 |
+
"learning_rate": 0.0004,
|
34477 |
+
"loss": 7.181,
|
34478 |
+
"step": 5655
|
34479 |
+
},
|
34480 |
+
{
|
34481 |
+
"epoch": 0.04,
|
34482 |
+
"learning_rate": 0.0004,
|
34483 |
+
"loss": 6.9262,
|
34484 |
+
"step": 5656
|
34485 |
+
},
|
34486 |
+
{
|
34487 |
+
"epoch": 0.04,
|
34488 |
+
"learning_rate": 0.0004,
|
34489 |
+
"loss": 6.5811,
|
34490 |
+
"step": 5657
|
34491 |
+
},
|
34492 |
+
{
|
34493 |
+
"epoch": 0.04,
|
34494 |
+
"learning_rate": 0.0004,
|
34495 |
+
"loss": 5.4126,
|
34496 |
+
"step": 5658
|
34497 |
+
},
|
34498 |
+
{
|
34499 |
+
"epoch": 0.04,
|
34500 |
+
"learning_rate": 0.0004,
|
34501 |
+
"loss": 3.8075,
|
34502 |
+
"step": 5659
|
34503 |
+
},
|
34504 |
+
{
|
34505 |
+
"epoch": 0.04,
|
34506 |
+
"learning_rate": 0.0004,
|
34507 |
+
"loss": 8.6973,
|
34508 |
+
"step": 5660
|
34509 |
+
},
|
34510 |
+
{
|
34511 |
+
"epoch": 0.04,
|
34512 |
+
"learning_rate": 0.0004,
|
34513 |
+
"loss": 6.8127,
|
34514 |
+
"step": 5661
|
34515 |
+
},
|
34516 |
+
{
|
34517 |
+
"epoch": 0.04,
|
34518 |
+
"learning_rate": 0.0004,
|
34519 |
+
"loss": 6.5205,
|
34520 |
+
"step": 5662
|
34521 |
+
},
|
34522 |
+
{
|
34523 |
+
"epoch": 0.04,
|
34524 |
+
"learning_rate": 0.0004,
|
34525 |
+
"loss": 7.7184,
|
34526 |
+
"step": 5663
|
34527 |
+
},
|
34528 |
+
{
|
34529 |
+
"epoch": 0.04,
|
34530 |
+
"learning_rate": 0.0004,
|
34531 |
+
"loss": 2.6631,
|
34532 |
+
"step": 5664
|
34533 |
+
},
|
34534 |
+
{
|
34535 |
+
"epoch": 0.04,
|
34536 |
+
"learning_rate": 0.0004,
|
34537 |
+
"loss": 8.0133,
|
34538 |
+
"step": 5665
|
34539 |
+
},
|
34540 |
+
{
|
34541 |
+
"epoch": 0.04,
|
34542 |
+
"learning_rate": 0.0004,
|
34543 |
+
"loss": 8.4575,
|
34544 |
+
"step": 5666
|
34545 |
+
},
|
34546 |
+
{
|
34547 |
+
"epoch": 0.04,
|
34548 |
+
"learning_rate": 0.0004,
|
34549 |
+
"loss": 7.2522,
|
34550 |
+
"step": 5667
|
34551 |
+
},
|
34552 |
+
{
|
34553 |
+
"epoch": 0.04,
|
34554 |
+
"learning_rate": 0.0004,
|
34555 |
+
"loss": 7.638,
|
34556 |
+
"step": 5668
|
34557 |
+
},
|
34558 |
+
{
|
34559 |
+
"epoch": 0.04,
|
34560 |
+
"learning_rate": 0.0004,
|
34561 |
+
"loss": 5.5406,
|
34562 |
+
"step": 5669
|
34563 |
+
},
|
34564 |
+
{
|
34565 |
+
"epoch": 0.04,
|
34566 |
+
"learning_rate": 0.0004,
|
34567 |
+
"loss": 6.9613,
|
34568 |
+
"step": 5670
|
34569 |
+
},
|
34570 |
+
{
|
34571 |
+
"epoch": 0.04,
|
34572 |
+
"learning_rate": 0.0004,
|
34573 |
+
"loss": 7.8844,
|
34574 |
+
"step": 5671
|
34575 |
+
},
|
34576 |
+
{
|
34577 |
+
"epoch": 0.04,
|
34578 |
+
"learning_rate": 0.0004,
|
34579 |
+
"loss": 7.2222,
|
34580 |
+
"step": 5672
|
34581 |
+
},
|
34582 |
+
{
|
34583 |
+
"epoch": 0.04,
|
34584 |
+
"learning_rate": 0.0004,
|
34585 |
+
"loss": 5.2324,
|
34586 |
+
"step": 5673
|
34587 |
+
},
|
34588 |
+
{
|
34589 |
+
"epoch": 0.04,
|
34590 |
+
"learning_rate": 0.0004,
|
34591 |
+
"loss": 2.9769,
|
34592 |
+
"step": 5674
|
34593 |
+
},
|
34594 |
+
{
|
34595 |
+
"epoch": 0.04,
|
34596 |
+
"learning_rate": 0.0004,
|
34597 |
+
"loss": 5.4176,
|
34598 |
+
"step": 5675
|
34599 |
+
},
|
34600 |
+
{
|
34601 |
+
"epoch": 0.04,
|
34602 |
+
"learning_rate": 0.0004,
|
34603 |
+
"loss": 3.5889,
|
34604 |
+
"step": 5676
|
34605 |
+
},
|
34606 |
+
{
|
34607 |
+
"epoch": 0.04,
|
34608 |
+
"learning_rate": 0.0004,
|
34609 |
+
"loss": 6.2919,
|
34610 |
+
"step": 5677
|
34611 |
+
},
|
34612 |
+
{
|
34613 |
+
"epoch": 0.04,
|
34614 |
+
"learning_rate": 0.0004,
|
34615 |
+
"loss": 6.9505,
|
34616 |
+
"step": 5678
|
34617 |
+
},
|
34618 |
+
{
|
34619 |
+
"epoch": 0.04,
|
34620 |
+
"learning_rate": 0.0004,
|
34621 |
+
"loss": 5.694,
|
34622 |
+
"step": 5679
|
34623 |
+
},
|
34624 |
+
{
|
34625 |
+
"epoch": 0.04,
|
34626 |
+
"learning_rate": 0.0004,
|
34627 |
+
"loss": 6.5429,
|
34628 |
+
"step": 5680
|
34629 |
+
},
|
34630 |
+
{
|
34631 |
+
"epoch": 0.04,
|
34632 |
+
"learning_rate": 0.0004,
|
34633 |
+
"loss": 5.5205,
|
34634 |
+
"step": 5681
|
34635 |
+
},
|
34636 |
+
{
|
34637 |
+
"epoch": 0.04,
|
34638 |
+
"learning_rate": 0.0004,
|
34639 |
+
"loss": 6.5445,
|
34640 |
+
"step": 5682
|
34641 |
+
},
|
34642 |
+
{
|
34643 |
+
"epoch": 0.04,
|
34644 |
+
"learning_rate": 0.0004,
|
34645 |
+
"loss": 7.3609,
|
34646 |
+
"step": 5683
|
34647 |
+
},
|
34648 |
+
{
|
34649 |
+
"epoch": 0.04,
|
34650 |
+
"learning_rate": 0.0004,
|
34651 |
+
"loss": 7.3904,
|
34652 |
+
"step": 5684
|
34653 |
+
},
|
34654 |
+
{
|
34655 |
+
"epoch": 0.04,
|
34656 |
+
"learning_rate": 0.0004,
|
34657 |
+
"loss": 5.3472,
|
34658 |
+
"step": 5685
|
34659 |
+
},
|
34660 |
+
{
|
34661 |
+
"epoch": 0.04,
|
34662 |
+
"learning_rate": 0.0004,
|
34663 |
+
"loss": 4.7564,
|
34664 |
+
"step": 5686
|
34665 |
+
},
|
34666 |
+
{
|
34667 |
+
"epoch": 0.04,
|
34668 |
+
"learning_rate": 0.0004,
|
34669 |
+
"loss": 6.312,
|
34670 |
+
"step": 5687
|
34671 |
+
},
|
34672 |
+
{
|
34673 |
+
"epoch": 0.04,
|
34674 |
+
"learning_rate": 0.0004,
|
34675 |
+
"loss": 5.4367,
|
34676 |
+
"step": 5688
|
34677 |
+
},
|
34678 |
+
{
|
34679 |
+
"epoch": 0.04,
|
34680 |
+
"learning_rate": 0.0004,
|
34681 |
+
"loss": 8.4472,
|
34682 |
+
"step": 5689
|
34683 |
+
},
|
34684 |
+
{
|
34685 |
+
"epoch": 0.04,
|
34686 |
+
"learning_rate": 0.0004,
|
34687 |
+
"loss": 5.8272,
|
34688 |
+
"step": 5690
|
34689 |
+
},
|
34690 |
+
{
|
34691 |
+
"epoch": 0.04,
|
34692 |
+
"learning_rate": 0.0004,
|
34693 |
+
"loss": 5.2634,
|
34694 |
+
"step": 5691
|
34695 |
+
},
|
34696 |
+
{
|
34697 |
+
"epoch": 0.04,
|
34698 |
+
"learning_rate": 0.0004,
|
34699 |
+
"loss": 3.2939,
|
34700 |
+
"step": 5692
|
34701 |
+
},
|
34702 |
+
{
|
34703 |
+
"epoch": 0.04,
|
34704 |
+
"learning_rate": 0.0004,
|
34705 |
+
"loss": 2.657,
|
34706 |
+
"step": 5693
|
34707 |
+
},
|
34708 |
+
{
|
34709 |
+
"epoch": 0.04,
|
34710 |
+
"learning_rate": 0.0004,
|
34711 |
+
"loss": 3.1746,
|
34712 |
+
"step": 5694
|
34713 |
+
},
|
34714 |
+
{
|
34715 |
+
"epoch": 0.04,
|
34716 |
+
"learning_rate": 0.0004,
|
34717 |
+
"loss": 3.8332,
|
34718 |
+
"step": 5695
|
34719 |
+
},
|
34720 |
+
{
|
34721 |
+
"epoch": 0.04,
|
34722 |
+
"learning_rate": 0.0004,
|
34723 |
+
"loss": 4.94,
|
34724 |
+
"step": 5696
|
34725 |
+
},
|
34726 |
+
{
|
34727 |
+
"epoch": 0.04,
|
34728 |
+
"learning_rate": 0.0004,
|
34729 |
+
"loss": 6.7484,
|
34730 |
+
"step": 5697
|
34731 |
+
},
|
34732 |
+
{
|
34733 |
+
"epoch": 0.04,
|
34734 |
+
"learning_rate": 0.0004,
|
34735 |
+
"loss": 5.5731,
|
34736 |
+
"step": 5698
|
34737 |
+
},
|
34738 |
+
{
|
34739 |
+
"epoch": 0.04,
|
34740 |
+
"learning_rate": 0.0004,
|
34741 |
+
"loss": 2.7432,
|
34742 |
+
"step": 5699
|
34743 |
+
},
|
34744 |
+
{
|
34745 |
+
"epoch": 0.04,
|
34746 |
+
"learning_rate": 0.0004,
|
34747 |
+
"loss": 2.3457,
|
34748 |
+
"step": 5700
|
34749 |
+
},
|
34750 |
+
{
|
34751 |
+
"epoch": 0.04,
|
34752 |
+
"learning_rate": 0.0004,
|
34753 |
+
"loss": 7.3142,
|
34754 |
+
"step": 5701
|
34755 |
+
},
|
34756 |
+
{
|
34757 |
+
"epoch": 0.04,
|
34758 |
+
"learning_rate": 0.0004,
|
34759 |
+
"loss": 8.6531,
|
34760 |
+
"step": 5702
|
34761 |
+
},
|
34762 |
+
{
|
34763 |
+
"epoch": 0.04,
|
34764 |
+
"learning_rate": 0.0004,
|
34765 |
+
"loss": 8.9737,
|
34766 |
+
"step": 5703
|
34767 |
+
},
|
34768 |
+
{
|
34769 |
+
"epoch": 0.04,
|
34770 |
+
"learning_rate": 0.0004,
|
34771 |
+
"loss": 5.6196,
|
34772 |
+
"step": 5704
|
34773 |
+
},
|
34774 |
+
{
|
34775 |
+
"epoch": 0.04,
|
34776 |
+
"learning_rate": 0.0004,
|
34777 |
+
"loss": 4.8655,
|
34778 |
+
"step": 5705
|
34779 |
+
},
|
34780 |
+
{
|
34781 |
+
"epoch": 0.04,
|
34782 |
+
"learning_rate": 0.0004,
|
34783 |
+
"loss": 3.082,
|
34784 |
+
"step": 5706
|
34785 |
+
},
|
34786 |
+
{
|
34787 |
+
"epoch": 0.04,
|
34788 |
+
"learning_rate": 0.0004,
|
34789 |
+
"loss": 3.3827,
|
34790 |
+
"step": 5707
|
34791 |
+
},
|
34792 |
+
{
|
34793 |
+
"epoch": 0.04,
|
34794 |
+
"learning_rate": 0.0004,
|
34795 |
+
"loss": 6.4305,
|
34796 |
+
"step": 5708
|
34797 |
+
},
|
34798 |
+
{
|
34799 |
+
"epoch": 0.04,
|
34800 |
+
"learning_rate": 0.0004,
|
34801 |
+
"loss": 7.6621,
|
34802 |
+
"step": 5709
|
34803 |
+
},
|
34804 |
+
{
|
34805 |
+
"epoch": 0.04,
|
34806 |
+
"learning_rate": 0.0004,
|
34807 |
+
"loss": 7.9571,
|
34808 |
+
"step": 5710
|
34809 |
+
},
|
34810 |
+
{
|
34811 |
+
"epoch": 0.04,
|
34812 |
+
"learning_rate": 0.0004,
|
34813 |
+
"loss": 7.9943,
|
34814 |
+
"step": 5711
|
34815 |
+
},
|
34816 |
+
{
|
34817 |
+
"epoch": 0.04,
|
34818 |
+
"learning_rate": 0.0004,
|
34819 |
+
"loss": 8.6949,
|
34820 |
+
"step": 5712
|
34821 |
+
},
|
34822 |
+
{
|
34823 |
+
"epoch": 0.04,
|
34824 |
+
"learning_rate": 0.0004,
|
34825 |
+
"loss": 7.3717,
|
34826 |
+
"step": 5713
|
34827 |
+
},
|
34828 |
+
{
|
34829 |
+
"epoch": 0.04,
|
34830 |
+
"learning_rate": 0.0004,
|
34831 |
+
"loss": 7.3738,
|
34832 |
+
"step": 5714
|
34833 |
+
},
|
34834 |
+
{
|
34835 |
+
"epoch": 0.04,
|
34836 |
+
"learning_rate": 0.0004,
|
34837 |
+
"loss": 6.5416,
|
34838 |
+
"step": 5715
|
34839 |
+
},
|
34840 |
+
{
|
34841 |
+
"epoch": 0.04,
|
34842 |
+
"learning_rate": 0.0004,
|
34843 |
+
"loss": 3.6103,
|
34844 |
+
"step": 5716
|
34845 |
+
},
|
34846 |
+
{
|
34847 |
+
"epoch": 0.04,
|
34848 |
+
"learning_rate": 0.0004,
|
34849 |
+
"loss": 6.9328,
|
34850 |
+
"step": 5717
|
34851 |
+
},
|
34852 |
+
{
|
34853 |
+
"epoch": 0.04,
|
34854 |
+
"learning_rate": 0.0004,
|
34855 |
+
"loss": 7.5956,
|
34856 |
+
"step": 5718
|
34857 |
+
},
|
34858 |
+
{
|
34859 |
+
"epoch": 0.04,
|
34860 |
+
"learning_rate": 0.0004,
|
34861 |
+
"loss": 6.846,
|
34862 |
+
"step": 5719
|
34863 |
+
},
|
34864 |
+
{
|
34865 |
+
"epoch": 0.04,
|
34866 |
+
"learning_rate": 0.0004,
|
34867 |
+
"loss": 8.6016,
|
34868 |
+
"step": 5720
|
34869 |
+
},
|
34870 |
+
{
|
34871 |
+
"epoch": 0.04,
|
34872 |
+
"learning_rate": 0.0004,
|
34873 |
+
"loss": 7.1171,
|
34874 |
+
"step": 5721
|
34875 |
+
},
|
34876 |
+
{
|
34877 |
+
"epoch": 0.04,
|
34878 |
+
"learning_rate": 0.0004,
|
34879 |
+
"loss": 5.5251,
|
34880 |
+
"step": 5722
|
34881 |
+
},
|
34882 |
+
{
|
34883 |
+
"epoch": 0.04,
|
34884 |
+
"learning_rate": 0.0004,
|
34885 |
+
"loss": 6.3209,
|
34886 |
+
"step": 5723
|
34887 |
+
},
|
34888 |
+
{
|
34889 |
+
"epoch": 0.04,
|
34890 |
+
"learning_rate": 0.0004,
|
34891 |
+
"loss": 3.9372,
|
34892 |
+
"step": 5724
|
34893 |
+
},
|
34894 |
+
{
|
34895 |
+
"epoch": 0.04,
|
34896 |
+
"learning_rate": 0.0004,
|
34897 |
+
"loss": 5.4344,
|
34898 |
+
"step": 5725
|
34899 |
+
},
|
34900 |
+
{
|
34901 |
+
"epoch": 0.04,
|
34902 |
+
"learning_rate": 0.0004,
|
34903 |
+
"loss": 3.4504,
|
34904 |
+
"step": 5726
|
34905 |
+
},
|
34906 |
+
{
|
34907 |
+
"epoch": 0.04,
|
34908 |
+
"learning_rate": 0.0004,
|
34909 |
+
"loss": 3.0255,
|
34910 |
+
"step": 5727
|
34911 |
+
},
|
34912 |
+
{
|
34913 |
+
"epoch": 0.04,
|
34914 |
+
"learning_rate": 0.0004,
|
34915 |
+
"loss": 5.6402,
|
34916 |
+
"step": 5728
|
34917 |
+
},
|
34918 |
+
{
|
34919 |
+
"epoch": 0.04,
|
34920 |
+
"learning_rate": 0.0004,
|
34921 |
+
"loss": 5.542,
|
34922 |
+
"step": 5729
|
34923 |
+
},
|
34924 |
+
{
|
34925 |
+
"epoch": 0.04,
|
34926 |
+
"learning_rate": 0.0004,
|
34927 |
+
"loss": 5.1837,
|
34928 |
+
"step": 5730
|
34929 |
+
},
|
34930 |
+
{
|
34931 |
+
"epoch": 0.04,
|
34932 |
+
"learning_rate": 0.0004,
|
34933 |
+
"loss": 3.796,
|
34934 |
+
"step": 5731
|
34935 |
+
},
|
34936 |
+
{
|
34937 |
+
"epoch": 0.04,
|
34938 |
+
"learning_rate": 0.0004,
|
34939 |
+
"loss": 5.8129,
|
34940 |
+
"step": 5732
|
34941 |
+
},
|
34942 |
+
{
|
34943 |
+
"epoch": 0.04,
|
34944 |
+
"learning_rate": 0.0004,
|
34945 |
+
"loss": 7.0823,
|
34946 |
+
"step": 5733
|
34947 |
+
},
|
34948 |
+
{
|
34949 |
+
"epoch": 0.04,
|
34950 |
+
"learning_rate": 0.0004,
|
34951 |
+
"loss": 6.2968,
|
34952 |
+
"step": 5734
|
34953 |
+
},
|
34954 |
+
{
|
34955 |
+
"epoch": 0.04,
|
34956 |
+
"learning_rate": 0.0004,
|
34957 |
+
"loss": 6.0133,
|
34958 |
+
"step": 5735
|
34959 |
+
},
|
34960 |
+
{
|
34961 |
+
"epoch": 0.04,
|
34962 |
+
"learning_rate": 0.0004,
|
34963 |
+
"loss": 6.5933,
|
34964 |
+
"step": 5736
|
34965 |
+
},
|
34966 |
+
{
|
34967 |
+
"epoch": 0.04,
|
34968 |
+
"learning_rate": 0.0004,
|
34969 |
+
"loss": 6.9654,
|
34970 |
+
"step": 5737
|
34971 |
+
},
|
34972 |
+
{
|
34973 |
+
"epoch": 0.04,
|
34974 |
+
"learning_rate": 0.0004,
|
34975 |
+
"loss": 3.2416,
|
34976 |
+
"step": 5738
|
34977 |
+
},
|
34978 |
+
{
|
34979 |
+
"epoch": 0.04,
|
34980 |
+
"learning_rate": 0.0004,
|
34981 |
+
"loss": 6.8676,
|
34982 |
+
"step": 5739
|
34983 |
+
},
|
34984 |
+
{
|
34985 |
+
"epoch": 0.04,
|
34986 |
+
"learning_rate": 0.0004,
|
34987 |
+
"loss": 8.6902,
|
34988 |
+
"step": 5740
|
34989 |
+
},
|
34990 |
+
{
|
34991 |
+
"epoch": 0.04,
|
34992 |
+
"learning_rate": 0.0004,
|
34993 |
+
"loss": 4.9099,
|
34994 |
+
"step": 5741
|
34995 |
+
},
|
34996 |
+
{
|
34997 |
+
"epoch": 0.04,
|
34998 |
+
"learning_rate": 0.0004,
|
34999 |
+
"loss": 5.0585,
|
35000 |
+
"step": 5742
|
35001 |
+
},
|
35002 |
+
{
|
35003 |
+
"epoch": 0.04,
|
35004 |
+
"learning_rate": 0.0004,
|
35005 |
+
"loss": 7.2383,
|
35006 |
+
"step": 5743
|
35007 |
+
},
|
35008 |
+
{
|
35009 |
+
"epoch": 0.04,
|
35010 |
+
"learning_rate": 0.0004,
|
35011 |
+
"loss": 5.7071,
|
35012 |
+
"step": 5744
|
35013 |
+
},
|
35014 |
+
{
|
35015 |
+
"epoch": 0.04,
|
35016 |
+
"learning_rate": 0.0004,
|
35017 |
+
"loss": 4.0832,
|
35018 |
+
"step": 5745
|
35019 |
+
},
|
35020 |
+
{
|
35021 |
+
"epoch": 0.04,
|
35022 |
+
"learning_rate": 0.0004,
|
35023 |
+
"loss": 6.3828,
|
35024 |
+
"step": 5746
|
35025 |
+
},
|
35026 |
+
{
|
35027 |
+
"epoch": 0.04,
|
35028 |
+
"learning_rate": 0.0004,
|
35029 |
+
"loss": 6.8322,
|
35030 |
+
"step": 5747
|
35031 |
+
},
|
35032 |
+
{
|
35033 |
+
"epoch": 0.04,
|
35034 |
+
"learning_rate": 0.0004,
|
35035 |
+
"loss": 2.7194,
|
35036 |
+
"step": 5748
|
35037 |
+
},
|
35038 |
+
{
|
35039 |
+
"epoch": 0.04,
|
35040 |
+
"learning_rate": 0.0004,
|
35041 |
+
"loss": 3.3352,
|
35042 |
+
"step": 5749
|
35043 |
+
},
|
35044 |
+
{
|
35045 |
+
"epoch": 0.04,
|
35046 |
+
"learning_rate": 0.0004,
|
35047 |
+
"loss": 3.3438,
|
35048 |
+
"step": 5750
|
35049 |
+
},
|
35050 |
+
{
|
35051 |
+
"epoch": 0.04,
|
35052 |
+
"learning_rate": 0.0004,
|
35053 |
+
"loss": 8.5028,
|
35054 |
+
"step": 5751
|
35055 |
+
},
|
35056 |
+
{
|
35057 |
+
"epoch": 0.04,
|
35058 |
+
"learning_rate": 0.0004,
|
35059 |
+
"loss": 9.2395,
|
35060 |
+
"step": 5752
|
35061 |
+
},
|
35062 |
+
{
|
35063 |
+
"epoch": 0.04,
|
35064 |
+
"learning_rate": 0.0004,
|
35065 |
+
"loss": 8.8966,
|
35066 |
+
"step": 5753
|
35067 |
+
},
|
35068 |
+
{
|
35069 |
+
"epoch": 0.04,
|
35070 |
+
"learning_rate": 0.0004,
|
35071 |
+
"loss": 5.8439,
|
35072 |
+
"step": 5754
|
35073 |
+
},
|
35074 |
+
{
|
35075 |
+
"epoch": 0.04,
|
35076 |
+
"learning_rate": 0.0004,
|
35077 |
+
"loss": 8.026,
|
35078 |
+
"step": 5755
|
35079 |
+
},
|
35080 |
+
{
|
35081 |
+
"epoch": 0.04,
|
35082 |
+
"learning_rate": 0.0004,
|
35083 |
+
"loss": 3.7253,
|
35084 |
+
"step": 5756
|
35085 |
+
},
|
35086 |
+
{
|
35087 |
+
"epoch": 0.04,
|
35088 |
+
"learning_rate": 0.0004,
|
35089 |
+
"loss": 2.6592,
|
35090 |
+
"step": 5757
|
35091 |
+
},
|
35092 |
+
{
|
35093 |
+
"epoch": 0.04,
|
35094 |
+
"learning_rate": 0.0004,
|
35095 |
+
"loss": 4.6777,
|
35096 |
+
"step": 5758
|
35097 |
+
},
|
35098 |
+
{
|
35099 |
+
"epoch": 0.04,
|
35100 |
+
"learning_rate": 0.0004,
|
35101 |
+
"loss": 6.7246,
|
35102 |
+
"step": 5759
|
35103 |
+
},
|
35104 |
+
{
|
35105 |
+
"epoch": 0.04,
|
35106 |
+
"learning_rate": 0.0004,
|
35107 |
+
"loss": 6.5969,
|
35108 |
+
"step": 5760
|
35109 |
+
},
|
35110 |
+
{
|
35111 |
+
"epoch": 0.04,
|
35112 |
+
"learning_rate": 0.0004,
|
35113 |
+
"loss": 7.5921,
|
35114 |
+
"step": 5761
|
35115 |
+
},
|
35116 |
+
{
|
35117 |
+
"epoch": 0.04,
|
35118 |
+
"learning_rate": 0.0004,
|
35119 |
+
"loss": 6.2614,
|
35120 |
+
"step": 5762
|
35121 |
+
},
|
35122 |
+
{
|
35123 |
+
"epoch": 0.04,
|
35124 |
+
"learning_rate": 0.0004,
|
35125 |
+
"loss": 8.1911,
|
35126 |
+
"step": 5763
|
35127 |
+
},
|
35128 |
+
{
|
35129 |
+
"epoch": 0.04,
|
35130 |
+
"learning_rate": 0.0004,
|
35131 |
+
"loss": 3.013,
|
35132 |
+
"step": 5764
|
35133 |
+
},
|
35134 |
+
{
|
35135 |
+
"epoch": 0.04,
|
35136 |
+
"learning_rate": 0.0004,
|
35137 |
+
"loss": 5.5307,
|
35138 |
+
"step": 5765
|
35139 |
+
},
|
35140 |
+
{
|
35141 |
+
"epoch": 0.04,
|
35142 |
+
"learning_rate": 0.0004,
|
35143 |
+
"loss": 3.5039,
|
35144 |
+
"step": 5766
|
35145 |
+
},
|
35146 |
+
{
|
35147 |
+
"epoch": 0.04,
|
35148 |
+
"learning_rate": 0.0004,
|
35149 |
+
"loss": 7.7382,
|
35150 |
+
"step": 5767
|
35151 |
+
},
|
35152 |
+
{
|
35153 |
+
"epoch": 0.04,
|
35154 |
+
"learning_rate": 0.0004,
|
35155 |
+
"loss": 6.9728,
|
35156 |
+
"step": 5768
|
35157 |
+
},
|
35158 |
+
{
|
35159 |
+
"epoch": 0.04,
|
35160 |
+
"learning_rate": 0.0004,
|
35161 |
+
"loss": 3.8039,
|
35162 |
+
"step": 5769
|
35163 |
+
},
|
35164 |
+
{
|
35165 |
+
"epoch": 0.04,
|
35166 |
+
"learning_rate": 0.0004,
|
35167 |
+
"loss": 2.5774,
|
35168 |
+
"step": 5770
|
35169 |
+
},
|
35170 |
+
{
|
35171 |
+
"epoch": 0.04,
|
35172 |
+
"learning_rate": 0.0004,
|
35173 |
+
"loss": 6.3394,
|
35174 |
+
"step": 5771
|
35175 |
+
},
|
35176 |
+
{
|
35177 |
+
"epoch": 0.04,
|
35178 |
+
"learning_rate": 0.0004,
|
35179 |
+
"loss": 6.6831,
|
35180 |
+
"step": 5772
|
35181 |
+
},
|
35182 |
+
{
|
35183 |
+
"epoch": 0.04,
|
35184 |
+
"learning_rate": 0.0004,
|
35185 |
+
"loss": 3.1592,
|
35186 |
+
"step": 5773
|
35187 |
+
},
|
35188 |
+
{
|
35189 |
+
"epoch": 0.04,
|
35190 |
+
"learning_rate": 0.0004,
|
35191 |
+
"loss": 3.0903,
|
35192 |
+
"step": 5774
|
35193 |
+
},
|
35194 |
+
{
|
35195 |
+
"epoch": 0.04,
|
35196 |
+
"learning_rate": 0.0004,
|
35197 |
+
"loss": 5.0717,
|
35198 |
+
"step": 5775
|
35199 |
+
},
|
35200 |
+
{
|
35201 |
+
"epoch": 0.04,
|
35202 |
+
"learning_rate": 0.0004,
|
35203 |
+
"loss": 5.9321,
|
35204 |
+
"step": 5776
|
35205 |
+
},
|
35206 |
+
{
|
35207 |
+
"epoch": 0.04,
|
35208 |
+
"learning_rate": 0.0004,
|
35209 |
+
"loss": 6.8956,
|
35210 |
+
"step": 5777
|
35211 |
+
},
|
35212 |
+
{
|
35213 |
+
"epoch": 0.04,
|
35214 |
+
"learning_rate": 0.0004,
|
35215 |
+
"loss": 6.0156,
|
35216 |
+
"step": 5778
|
35217 |
+
},
|
35218 |
+
{
|
35219 |
+
"epoch": 0.04,
|
35220 |
+
"learning_rate": 0.0004,
|
35221 |
+
"loss": 10.4466,
|
35222 |
+
"step": 5779
|
35223 |
+
},
|
35224 |
+
{
|
35225 |
+
"epoch": 0.04,
|
35226 |
+
"learning_rate": 0.0004,
|
35227 |
+
"loss": 6.7845,
|
35228 |
+
"step": 5780
|
35229 |
+
},
|
35230 |
+
{
|
35231 |
+
"epoch": 0.04,
|
35232 |
+
"learning_rate": 0.0004,
|
35233 |
+
"loss": 6.6201,
|
35234 |
+
"step": 5781
|
35235 |
+
},
|
35236 |
+
{
|
35237 |
+
"epoch": 0.04,
|
35238 |
+
"learning_rate": 0.0004,
|
35239 |
+
"loss": 8.0356,
|
35240 |
+
"step": 5782
|
35241 |
+
},
|
35242 |
+
{
|
35243 |
+
"epoch": 0.04,
|
35244 |
+
"learning_rate": 0.0004,
|
35245 |
+
"loss": 3.6344,
|
35246 |
+
"step": 5783
|
35247 |
+
},
|
35248 |
+
{
|
35249 |
+
"epoch": 0.04,
|
35250 |
+
"learning_rate": 0.0004,
|
35251 |
+
"loss": 1.9238,
|
35252 |
+
"step": 5784
|
35253 |
+
},
|
35254 |
+
{
|
35255 |
+
"epoch": 0.04,
|
35256 |
+
"learning_rate": 0.0004,
|
35257 |
+
"loss": 3.1729,
|
35258 |
+
"step": 5785
|
35259 |
+
},
|
35260 |
+
{
|
35261 |
+
"epoch": 0.04,
|
35262 |
+
"learning_rate": 0.0004,
|
35263 |
+
"loss": 3.2512,
|
35264 |
+
"step": 5786
|
35265 |
+
},
|
35266 |
+
{
|
35267 |
+
"epoch": 0.04,
|
35268 |
+
"learning_rate": 0.0004,
|
35269 |
+
"loss": 5.6445,
|
35270 |
+
"step": 5787
|
35271 |
+
},
|
35272 |
+
{
|
35273 |
+
"epoch": 0.04,
|
35274 |
+
"learning_rate": 0.0004,
|
35275 |
+
"loss": 8.5213,
|
35276 |
+
"step": 5788
|
35277 |
+
},
|
35278 |
+
{
|
35279 |
+
"epoch": 0.04,
|
35280 |
+
"learning_rate": 0.0004,
|
35281 |
+
"loss": 7.6667,
|
35282 |
+
"step": 5789
|
35283 |
+
},
|
35284 |
+
{
|
35285 |
+
"epoch": 0.04,
|
35286 |
+
"learning_rate": 0.0004,
|
35287 |
+
"loss": 6.6139,
|
35288 |
+
"step": 5790
|
35289 |
+
},
|
35290 |
+
{
|
35291 |
+
"epoch": 0.04,
|
35292 |
+
"learning_rate": 0.0004,
|
35293 |
+
"loss": 7.3236,
|
35294 |
+
"step": 5791
|
35295 |
+
},
|
35296 |
+
{
|
35297 |
+
"epoch": 0.04,
|
35298 |
+
"learning_rate": 0.0004,
|
35299 |
+
"loss": 5.4503,
|
35300 |
+
"step": 5792
|
35301 |
+
},
|
35302 |
+
{
|
35303 |
+
"epoch": 0.04,
|
35304 |
+
"learning_rate": 0.0004,
|
35305 |
+
"loss": 5.5111,
|
35306 |
+
"step": 5793
|
35307 |
+
},
|
35308 |
+
{
|
35309 |
+
"epoch": 0.04,
|
35310 |
+
"learning_rate": 0.0004,
|
35311 |
+
"loss": 5.6659,
|
35312 |
+
"step": 5794
|
35313 |
+
},
|
35314 |
+
{
|
35315 |
+
"epoch": 0.04,
|
35316 |
+
"learning_rate": 0.0004,
|
35317 |
+
"loss": 6.4502,
|
35318 |
+
"step": 5795
|
35319 |
+
},
|
35320 |
+
{
|
35321 |
+
"epoch": 0.04,
|
35322 |
+
"learning_rate": 0.0004,
|
35323 |
+
"loss": 7.0923,
|
35324 |
+
"step": 5796
|
35325 |
+
},
|
35326 |
+
{
|
35327 |
+
"epoch": 0.04,
|
35328 |
+
"learning_rate": 0.0004,
|
35329 |
+
"loss": 7.4155,
|
35330 |
+
"step": 5797
|
35331 |
+
},
|
35332 |
+
{
|
35333 |
+
"epoch": 0.04,
|
35334 |
+
"learning_rate": 0.0004,
|
35335 |
+
"loss": 6.3765,
|
35336 |
+
"step": 5798
|
35337 |
+
},
|
35338 |
+
{
|
35339 |
+
"epoch": 0.04,
|
35340 |
+
"learning_rate": 0.0004,
|
35341 |
+
"loss": 6.0413,
|
35342 |
+
"step": 5799
|
35343 |
+
},
|
35344 |
+
{
|
35345 |
+
"epoch": 0.04,
|
35346 |
+
"learning_rate": 0.0004,
|
35347 |
+
"loss": 4.103,
|
35348 |
+
"step": 5800
|
35349 |
+
},
|
35350 |
+
{
|
35351 |
+
"epoch": 0.04,
|
35352 |
+
"eval_loss": 6.396474838256836,
|
35353 |
+
"eval_runtime": 22.3993,
|
35354 |
+
"eval_samples_per_second": 2.232,
|
35355 |
+
"eval_steps_per_second": 1.116,
|
35356 |
+
"step": 5800
|
35357 |
+
},
|
35358 |
+
{
|
35359 |
+
"epoch": 0.04,
|
35360 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
35361 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
35362 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
35363 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
35364 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
35365 |
+
"mmlu_loss": 3.9258560848236086,
|
35366 |
+
"step": 5800
|
35367 |
+
},
|
35368 |
+
{
|
35369 |
+
"epoch": 0.04,
|
35370 |
+
"learning_rate": 0.0004,
|
35371 |
+
"loss": 6.7963,
|
35372 |
+
"step": 5801
|
35373 |
+
},
|
35374 |
+
{
|
35375 |
+
"epoch": 0.04,
|
35376 |
+
"learning_rate": 0.0004,
|
35377 |
+
"loss": 7.8511,
|
35378 |
+
"step": 5802
|
35379 |
+
},
|
35380 |
+
{
|
35381 |
+
"epoch": 0.04,
|
35382 |
+
"learning_rate": 0.0004,
|
35383 |
+
"loss": 8.0362,
|
35384 |
+
"step": 5803
|
35385 |
+
},
|
35386 |
+
{
|
35387 |
+
"epoch": 0.04,
|
35388 |
+
"learning_rate": 0.0004,
|
35389 |
+
"loss": 7.8104,
|
35390 |
+
"step": 5804
|
35391 |
+
},
|
35392 |
+
{
|
35393 |
+
"epoch": 0.04,
|
35394 |
+
"learning_rate": 0.0004,
|
35395 |
+
"loss": 3.5734,
|
35396 |
+
"step": 5805
|
35397 |
+
},
|
35398 |
+
{
|
35399 |
+
"epoch": 0.04,
|
35400 |
+
"learning_rate": 0.0004,
|
35401 |
+
"loss": 7.0506,
|
35402 |
+
"step": 5806
|
35403 |
+
},
|
35404 |
+
{
|
35405 |
+
"epoch": 0.04,
|
35406 |
+
"learning_rate": 0.0004,
|
35407 |
+
"loss": 7.656,
|
35408 |
+
"step": 5807
|
35409 |
+
},
|
35410 |
+
{
|
35411 |
+
"epoch": 0.04,
|
35412 |
+
"learning_rate": 0.0004,
|
35413 |
+
"loss": 6.3932,
|
35414 |
+
"step": 5808
|
35415 |
+
},
|
35416 |
+
{
|
35417 |
+
"epoch": 0.04,
|
35418 |
+
"learning_rate": 0.0004,
|
35419 |
+
"loss": 3.8245,
|
35420 |
+
"step": 5809
|
35421 |
+
},
|
35422 |
+
{
|
35423 |
+
"epoch": 0.04,
|
35424 |
+
"learning_rate": 0.0004,
|
35425 |
+
"loss": 7.633,
|
35426 |
+
"step": 5810
|
35427 |
+
},
|
35428 |
+
{
|
35429 |
+
"epoch": 0.04,
|
35430 |
+
"learning_rate": 0.0004,
|
35431 |
+
"loss": 8.2002,
|
35432 |
+
"step": 5811
|
35433 |
+
},
|
35434 |
+
{
|
35435 |
+
"epoch": 0.04,
|
35436 |
+
"learning_rate": 0.0004,
|
35437 |
+
"loss": 7.415,
|
35438 |
+
"step": 5812
|
35439 |
+
},
|
35440 |
+
{
|
35441 |
+
"epoch": 0.04,
|
35442 |
+
"learning_rate": 0.0004,
|
35443 |
+
"loss": 8.3959,
|
35444 |
+
"step": 5813
|
35445 |
+
},
|
35446 |
+
{
|
35447 |
+
"epoch": 0.04,
|
35448 |
+
"learning_rate": 0.0004,
|
35449 |
+
"loss": 5.0557,
|
35450 |
+
"step": 5814
|
35451 |
+
},
|
35452 |
+
{
|
35453 |
+
"epoch": 0.04,
|
35454 |
+
"learning_rate": 0.0004,
|
35455 |
+
"loss": 5.5936,
|
35456 |
+
"step": 5815
|
35457 |
+
},
|
35458 |
+
{
|
35459 |
+
"epoch": 0.04,
|
35460 |
+
"learning_rate": 0.0004,
|
35461 |
+
"loss": 5.6552,
|
35462 |
+
"step": 5816
|
35463 |
+
},
|
35464 |
+
{
|
35465 |
+
"epoch": 0.04,
|
35466 |
+
"learning_rate": 0.0004,
|
35467 |
+
"loss": 6.4557,
|
35468 |
+
"step": 5817
|
35469 |
+
},
|
35470 |
+
{
|
35471 |
+
"epoch": 0.04,
|
35472 |
+
"learning_rate": 0.0004,
|
35473 |
+
"loss": 3.4525,
|
35474 |
+
"step": 5818
|
35475 |
+
},
|
35476 |
+
{
|
35477 |
+
"epoch": 0.04,
|
35478 |
+
"learning_rate": 0.0004,
|
35479 |
+
"loss": 5.2712,
|
35480 |
+
"step": 5819
|
35481 |
+
},
|
35482 |
+
{
|
35483 |
+
"epoch": 0.04,
|
35484 |
+
"learning_rate": 0.0004,
|
35485 |
+
"loss": 6.5788,
|
35486 |
+
"step": 5820
|
35487 |
+
},
|
35488 |
+
{
|
35489 |
+
"epoch": 0.04,
|
35490 |
+
"learning_rate": 0.0004,
|
35491 |
+
"loss": 3.0075,
|
35492 |
+
"step": 5821
|
35493 |
+
},
|
35494 |
+
{
|
35495 |
+
"epoch": 0.04,
|
35496 |
+
"learning_rate": 0.0004,
|
35497 |
+
"loss": 3.6125,
|
35498 |
+
"step": 5822
|
35499 |
+
},
|
35500 |
+
{
|
35501 |
+
"epoch": 0.04,
|
35502 |
+
"learning_rate": 0.0004,
|
35503 |
+
"loss": 6.3804,
|
35504 |
+
"step": 5823
|
35505 |
+
},
|
35506 |
+
{
|
35507 |
+
"epoch": 0.04,
|
35508 |
+
"learning_rate": 0.0004,
|
35509 |
+
"loss": 6.4267,
|
35510 |
+
"step": 5824
|
35511 |
+
},
|
35512 |
+
{
|
35513 |
+
"epoch": 0.04,
|
35514 |
+
"learning_rate": 0.0004,
|
35515 |
+
"loss": 2.6356,
|
35516 |
+
"step": 5825
|
35517 |
+
},
|
35518 |
+
{
|
35519 |
+
"epoch": 0.04,
|
35520 |
+
"learning_rate": 0.0004,
|
35521 |
+
"loss": 3.2399,
|
35522 |
+
"step": 5826
|
35523 |
+
},
|
35524 |
+
{
|
35525 |
+
"epoch": 0.04,
|
35526 |
+
"learning_rate": 0.0004,
|
35527 |
+
"loss": 3.8583,
|
35528 |
+
"step": 5827
|
35529 |
+
},
|
35530 |
+
{
|
35531 |
+
"epoch": 0.04,
|
35532 |
+
"learning_rate": 0.0004,
|
35533 |
+
"loss": 7.3494,
|
35534 |
+
"step": 5828
|
35535 |
+
},
|
35536 |
+
{
|
35537 |
+
"epoch": 0.04,
|
35538 |
+
"learning_rate": 0.0004,
|
35539 |
+
"loss": 8.0112,
|
35540 |
+
"step": 5829
|
35541 |
+
},
|
35542 |
+
{
|
35543 |
+
"epoch": 0.04,
|
35544 |
+
"learning_rate": 0.0004,
|
35545 |
+
"loss": 8.23,
|
35546 |
+
"step": 5830
|
35547 |
+
},
|
35548 |
+
{
|
35549 |
+
"epoch": 0.04,
|
35550 |
+
"learning_rate": 0.0004,
|
35551 |
+
"loss": 3.9515,
|
35552 |
+
"step": 5831
|
35553 |
+
},
|
35554 |
+
{
|
35555 |
+
"epoch": 0.04,
|
35556 |
+
"learning_rate": 0.0004,
|
35557 |
+
"loss": 2.8647,
|
35558 |
+
"step": 5832
|
35559 |
+
},
|
35560 |
+
{
|
35561 |
+
"epoch": 0.04,
|
35562 |
+
"learning_rate": 0.0004,
|
35563 |
+
"loss": 5.9658,
|
35564 |
+
"step": 5833
|
35565 |
+
},
|
35566 |
+
{
|
35567 |
+
"epoch": 0.04,
|
35568 |
+
"learning_rate": 0.0004,
|
35569 |
+
"loss": 5.105,
|
35570 |
+
"step": 5834
|
35571 |
+
},
|
35572 |
+
{
|
35573 |
+
"epoch": 0.04,
|
35574 |
+
"learning_rate": 0.0004,
|
35575 |
+
"loss": 7.1376,
|
35576 |
+
"step": 5835
|
35577 |
+
},
|
35578 |
+
{
|
35579 |
+
"epoch": 0.04,
|
35580 |
+
"learning_rate": 0.0004,
|
35581 |
+
"loss": 3.6733,
|
35582 |
+
"step": 5836
|
35583 |
+
},
|
35584 |
+
{
|
35585 |
+
"epoch": 0.04,
|
35586 |
+
"learning_rate": 0.0004,
|
35587 |
+
"loss": 6.3152,
|
35588 |
+
"step": 5837
|
35589 |
+
},
|
35590 |
+
{
|
35591 |
+
"epoch": 0.04,
|
35592 |
+
"learning_rate": 0.0004,
|
35593 |
+
"loss": 5.0539,
|
35594 |
+
"step": 5838
|
35595 |
+
},
|
35596 |
+
{
|
35597 |
+
"epoch": 0.04,
|
35598 |
+
"learning_rate": 0.0004,
|
35599 |
+
"loss": 4.3399,
|
35600 |
+
"step": 5839
|
35601 |
+
},
|
35602 |
+
{
|
35603 |
+
"epoch": 0.04,
|
35604 |
+
"learning_rate": 0.0004,
|
35605 |
+
"loss": 6.6373,
|
35606 |
+
"step": 5840
|
35607 |
+
},
|
35608 |
+
{
|
35609 |
+
"epoch": 0.04,
|
35610 |
+
"learning_rate": 0.0004,
|
35611 |
+
"loss": 2.2022,
|
35612 |
+
"step": 5841
|
35613 |
+
},
|
35614 |
+
{
|
35615 |
+
"epoch": 0.04,
|
35616 |
+
"learning_rate": 0.0004,
|
35617 |
+
"loss": 4.2046,
|
35618 |
+
"step": 5842
|
35619 |
+
},
|
35620 |
+
{
|
35621 |
+
"epoch": 0.04,
|
35622 |
+
"learning_rate": 0.0004,
|
35623 |
+
"loss": 7.3559,
|
35624 |
+
"step": 5843
|
35625 |
+
},
|
35626 |
+
{
|
35627 |
+
"epoch": 0.04,
|
35628 |
+
"learning_rate": 0.0004,
|
35629 |
+
"loss": 5.4401,
|
35630 |
+
"step": 5844
|
35631 |
+
},
|
35632 |
+
{
|
35633 |
+
"epoch": 0.04,
|
35634 |
+
"learning_rate": 0.0004,
|
35635 |
+
"loss": 2.1425,
|
35636 |
+
"step": 5845
|
35637 |
+
},
|
35638 |
+
{
|
35639 |
+
"epoch": 0.04,
|
35640 |
+
"learning_rate": 0.0004,
|
35641 |
+
"loss": 8.926,
|
35642 |
+
"step": 5846
|
35643 |
+
},
|
35644 |
+
{
|
35645 |
+
"epoch": 0.04,
|
35646 |
+
"learning_rate": 0.0004,
|
35647 |
+
"loss": 6.7124,
|
35648 |
+
"step": 5847
|
35649 |
+
},
|
35650 |
+
{
|
35651 |
+
"epoch": 0.04,
|
35652 |
+
"learning_rate": 0.0004,
|
35653 |
+
"loss": 4.5364,
|
35654 |
+
"step": 5848
|
35655 |
+
},
|
35656 |
+
{
|
35657 |
+
"epoch": 0.04,
|
35658 |
+
"learning_rate": 0.0004,
|
35659 |
+
"loss": 4.0998,
|
35660 |
+
"step": 5849
|
35661 |
+
},
|
35662 |
+
{
|
35663 |
+
"epoch": 0.04,
|
35664 |
+
"learning_rate": 0.0004,
|
35665 |
+
"loss": 4.5025,
|
35666 |
+
"step": 5850
|
35667 |
+
},
|
35668 |
+
{
|
35669 |
+
"epoch": 0.04,
|
35670 |
+
"learning_rate": 0.0004,
|
35671 |
+
"loss": 3.0973,
|
35672 |
+
"step": 5851
|
35673 |
+
},
|
35674 |
+
{
|
35675 |
+
"epoch": 0.04,
|
35676 |
+
"learning_rate": 0.0004,
|
35677 |
+
"loss": 8.7097,
|
35678 |
+
"step": 5852
|
35679 |
+
},
|
35680 |
+
{
|
35681 |
+
"epoch": 0.04,
|
35682 |
+
"learning_rate": 0.0004,
|
35683 |
+
"loss": 3.1209,
|
35684 |
+
"step": 5853
|
35685 |
+
},
|
35686 |
+
{
|
35687 |
+
"epoch": 0.04,
|
35688 |
+
"learning_rate": 0.0004,
|
35689 |
+
"loss": 7.3284,
|
35690 |
+
"step": 5854
|
35691 |
+
},
|
35692 |
+
{
|
35693 |
+
"epoch": 0.04,
|
35694 |
+
"learning_rate": 0.0004,
|
35695 |
+
"loss": 5.096,
|
35696 |
+
"step": 5855
|
35697 |
+
},
|
35698 |
+
{
|
35699 |
+
"epoch": 0.04,
|
35700 |
+
"learning_rate": 0.0004,
|
35701 |
+
"loss": 5.7432,
|
35702 |
+
"step": 5856
|
35703 |
+
},
|
35704 |
+
{
|
35705 |
+
"epoch": 0.04,
|
35706 |
+
"learning_rate": 0.0004,
|
35707 |
+
"loss": 7.9329,
|
35708 |
+
"step": 5857
|
35709 |
+
},
|
35710 |
+
{
|
35711 |
+
"epoch": 0.04,
|
35712 |
+
"learning_rate": 0.0004,
|
35713 |
+
"loss": 3.5233,
|
35714 |
+
"step": 5858
|
35715 |
+
},
|
35716 |
+
{
|
35717 |
+
"epoch": 0.04,
|
35718 |
+
"learning_rate": 0.0004,
|
35719 |
+
"loss": 2.4872,
|
35720 |
+
"step": 5859
|
35721 |
+
},
|
35722 |
+
{
|
35723 |
+
"epoch": 0.04,
|
35724 |
+
"learning_rate": 0.0004,
|
35725 |
+
"loss": 8.2481,
|
35726 |
+
"step": 5860
|
35727 |
+
},
|
35728 |
+
{
|
35729 |
+
"epoch": 0.04,
|
35730 |
+
"learning_rate": 0.0004,
|
35731 |
+
"loss": 3.1908,
|
35732 |
+
"step": 5861
|
35733 |
+
},
|
35734 |
+
{
|
35735 |
+
"epoch": 0.04,
|
35736 |
+
"learning_rate": 0.0004,
|
35737 |
+
"loss": 7.7033,
|
35738 |
+
"step": 5862
|
35739 |
+
},
|
35740 |
+
{
|
35741 |
+
"epoch": 0.04,
|
35742 |
+
"learning_rate": 0.0004,
|
35743 |
+
"loss": 6.8059,
|
35744 |
+
"step": 5863
|
35745 |
+
},
|
35746 |
+
{
|
35747 |
+
"epoch": 0.04,
|
35748 |
+
"learning_rate": 0.0004,
|
35749 |
+
"loss": 4.1783,
|
35750 |
+
"step": 5864
|
35751 |
+
},
|
35752 |
+
{
|
35753 |
+
"epoch": 0.04,
|
35754 |
+
"learning_rate": 0.0004,
|
35755 |
+
"loss": 2.6015,
|
35756 |
+
"step": 5865
|
35757 |
+
},
|
35758 |
+
{
|
35759 |
+
"epoch": 0.04,
|
35760 |
+
"learning_rate": 0.0004,
|
35761 |
+
"loss": 5.8913,
|
35762 |
+
"step": 5866
|
35763 |
+
},
|
35764 |
+
{
|
35765 |
+
"epoch": 0.04,
|
35766 |
+
"learning_rate": 0.0004,
|
35767 |
+
"loss": 4.0391,
|
35768 |
+
"step": 5867
|
35769 |
+
},
|
35770 |
+
{
|
35771 |
+
"epoch": 0.04,
|
35772 |
+
"learning_rate": 0.0004,
|
35773 |
+
"loss": 6.6047,
|
35774 |
+
"step": 5868
|
35775 |
+
},
|
35776 |
+
{
|
35777 |
+
"epoch": 0.04,
|
35778 |
+
"learning_rate": 0.0004,
|
35779 |
+
"loss": 4.9347,
|
35780 |
+
"step": 5869
|
35781 |
+
},
|
35782 |
+
{
|
35783 |
+
"epoch": 0.04,
|
35784 |
+
"learning_rate": 0.0004,
|
35785 |
+
"loss": 7.2305,
|
35786 |
+
"step": 5870
|
35787 |
+
},
|
35788 |
+
{
|
35789 |
+
"epoch": 0.04,
|
35790 |
+
"learning_rate": 0.0004,
|
35791 |
+
"loss": 5.8909,
|
35792 |
+
"step": 5871
|
35793 |
+
},
|
35794 |
+
{
|
35795 |
+
"epoch": 0.04,
|
35796 |
+
"learning_rate": 0.0004,
|
35797 |
+
"loss": 5.762,
|
35798 |
+
"step": 5872
|
35799 |
+
},
|
35800 |
+
{
|
35801 |
+
"epoch": 0.04,
|
35802 |
+
"learning_rate": 0.0004,
|
35803 |
+
"loss": 7.7704,
|
35804 |
+
"step": 5873
|
35805 |
+
},
|
35806 |
+
{
|
35807 |
+
"epoch": 0.04,
|
35808 |
+
"learning_rate": 0.0004,
|
35809 |
+
"loss": 4.8633,
|
35810 |
+
"step": 5874
|
35811 |
+
},
|
35812 |
+
{
|
35813 |
+
"epoch": 0.04,
|
35814 |
+
"learning_rate": 0.0004,
|
35815 |
+
"loss": 6.6627,
|
35816 |
+
"step": 5875
|
35817 |
+
},
|
35818 |
+
{
|
35819 |
+
"epoch": 0.04,
|
35820 |
+
"learning_rate": 0.0004,
|
35821 |
+
"loss": 7.5499,
|
35822 |
+
"step": 5876
|
35823 |
+
},
|
35824 |
+
{
|
35825 |
+
"epoch": 0.04,
|
35826 |
+
"learning_rate": 0.0004,
|
35827 |
+
"loss": 6.6472,
|
35828 |
+
"step": 5877
|
35829 |
+
},
|
35830 |
+
{
|
35831 |
+
"epoch": 0.04,
|
35832 |
+
"learning_rate": 0.0004,
|
35833 |
+
"loss": 7.4914,
|
35834 |
+
"step": 5878
|
35835 |
+
},
|
35836 |
+
{
|
35837 |
+
"epoch": 0.04,
|
35838 |
+
"learning_rate": 0.0004,
|
35839 |
+
"loss": 3.9684,
|
35840 |
+
"step": 5879
|
35841 |
+
},
|
35842 |
+
{
|
35843 |
+
"epoch": 0.04,
|
35844 |
+
"learning_rate": 0.0004,
|
35845 |
+
"loss": 6.903,
|
35846 |
+
"step": 5880
|
35847 |
+
},
|
35848 |
+
{
|
35849 |
+
"epoch": 0.04,
|
35850 |
+
"learning_rate": 0.0004,
|
35851 |
+
"loss": 6.4157,
|
35852 |
+
"step": 5881
|
35853 |
+
},
|
35854 |
+
{
|
35855 |
+
"epoch": 0.04,
|
35856 |
+
"learning_rate": 0.0004,
|
35857 |
+
"loss": 6.2942,
|
35858 |
+
"step": 5882
|
35859 |
+
},
|
35860 |
+
{
|
35861 |
+
"epoch": 0.04,
|
35862 |
+
"learning_rate": 0.0004,
|
35863 |
+
"loss": 2.6608,
|
35864 |
+
"step": 5883
|
35865 |
+
},
|
35866 |
+
{
|
35867 |
+
"epoch": 0.04,
|
35868 |
+
"learning_rate": 0.0004,
|
35869 |
+
"loss": 5.3432,
|
35870 |
+
"step": 5884
|
35871 |
+
},
|
35872 |
+
{
|
35873 |
+
"epoch": 0.04,
|
35874 |
+
"learning_rate": 0.0004,
|
35875 |
+
"loss": 2.9234,
|
35876 |
+
"step": 5885
|
35877 |
+
},
|
35878 |
+
{
|
35879 |
+
"epoch": 0.04,
|
35880 |
+
"learning_rate": 0.0004,
|
35881 |
+
"loss": 6.6604,
|
35882 |
+
"step": 5886
|
35883 |
+
},
|
35884 |
+
{
|
35885 |
+
"epoch": 0.04,
|
35886 |
+
"learning_rate": 0.0004,
|
35887 |
+
"loss": 2.7875,
|
35888 |
+
"step": 5887
|
35889 |
+
},
|
35890 |
+
{
|
35891 |
+
"epoch": 0.04,
|
35892 |
+
"learning_rate": 0.0004,
|
35893 |
+
"loss": 5.4057,
|
35894 |
+
"step": 5888
|
35895 |
+
},
|
35896 |
+
{
|
35897 |
+
"epoch": 0.04,
|
35898 |
+
"learning_rate": 0.0004,
|
35899 |
+
"loss": 2.352,
|
35900 |
+
"step": 5889
|
35901 |
+
},
|
35902 |
+
{
|
35903 |
+
"epoch": 0.04,
|
35904 |
+
"learning_rate": 0.0004,
|
35905 |
+
"loss": 2.2785,
|
35906 |
+
"step": 5890
|
35907 |
+
},
|
35908 |
+
{
|
35909 |
+
"epoch": 0.05,
|
35910 |
+
"learning_rate": 0.0004,
|
35911 |
+
"loss": 3.4352,
|
35912 |
+
"step": 5891
|
35913 |
+
},
|
35914 |
+
{
|
35915 |
+
"epoch": 0.05,
|
35916 |
+
"learning_rate": 0.0004,
|
35917 |
+
"loss": 5.6623,
|
35918 |
+
"step": 5892
|
35919 |
+
},
|
35920 |
+
{
|
35921 |
+
"epoch": 0.05,
|
35922 |
+
"learning_rate": 0.0004,
|
35923 |
+
"loss": 2.8201,
|
35924 |
+
"step": 5893
|
35925 |
+
},
|
35926 |
+
{
|
35927 |
+
"epoch": 0.05,
|
35928 |
+
"learning_rate": 0.0004,
|
35929 |
+
"loss": 6.2894,
|
35930 |
+
"step": 5894
|
35931 |
+
},
|
35932 |
+
{
|
35933 |
+
"epoch": 0.05,
|
35934 |
+
"learning_rate": 0.0004,
|
35935 |
+
"loss": 7.0568,
|
35936 |
+
"step": 5895
|
35937 |
+
},
|
35938 |
+
{
|
35939 |
+
"epoch": 0.05,
|
35940 |
+
"learning_rate": 0.0004,
|
35941 |
+
"loss": 9.1368,
|
35942 |
+
"step": 5896
|
35943 |
+
},
|
35944 |
+
{
|
35945 |
+
"epoch": 0.05,
|
35946 |
+
"learning_rate": 0.0004,
|
35947 |
+
"loss": 4.4088,
|
35948 |
+
"step": 5897
|
35949 |
+
},
|
35950 |
+
{
|
35951 |
+
"epoch": 0.05,
|
35952 |
+
"learning_rate": 0.0004,
|
35953 |
+
"loss": 6.5719,
|
35954 |
+
"step": 5898
|
35955 |
+
},
|
35956 |
+
{
|
35957 |
+
"epoch": 0.05,
|
35958 |
+
"learning_rate": 0.0004,
|
35959 |
+
"loss": 7.4458,
|
35960 |
+
"step": 5899
|
35961 |
+
},
|
35962 |
+
{
|
35963 |
+
"epoch": 0.05,
|
35964 |
+
"learning_rate": 0.0004,
|
35965 |
+
"loss": 6.4525,
|
35966 |
+
"step": 5900
|
35967 |
+
},
|
35968 |
+
{
|
35969 |
+
"epoch": 0.05,
|
35970 |
+
"learning_rate": 0.0004,
|
35971 |
+
"loss": 8.5376,
|
35972 |
+
"step": 5901
|
35973 |
+
},
|
35974 |
+
{
|
35975 |
+
"epoch": 0.05,
|
35976 |
+
"learning_rate": 0.0004,
|
35977 |
+
"loss": 6.6726,
|
35978 |
+
"step": 5902
|
35979 |
+
},
|
35980 |
+
{
|
35981 |
+
"epoch": 0.05,
|
35982 |
+
"learning_rate": 0.0004,
|
35983 |
+
"loss": 7.8266,
|
35984 |
+
"step": 5903
|
35985 |
+
},
|
35986 |
+
{
|
35987 |
+
"epoch": 0.05,
|
35988 |
+
"learning_rate": 0.0004,
|
35989 |
+
"loss": 6.7965,
|
35990 |
+
"step": 5904
|
35991 |
+
},
|
35992 |
+
{
|
35993 |
+
"epoch": 0.05,
|
35994 |
+
"learning_rate": 0.0004,
|
35995 |
+
"loss": 4.8672,
|
35996 |
+
"step": 5905
|
35997 |
+
},
|
35998 |
+
{
|
35999 |
+
"epoch": 0.05,
|
36000 |
+
"learning_rate": 0.0004,
|
36001 |
+
"loss": 3.2546,
|
36002 |
+
"step": 5906
|
36003 |
+
},
|
36004 |
+
{
|
36005 |
+
"epoch": 0.05,
|
36006 |
+
"learning_rate": 0.0004,
|
36007 |
+
"loss": 7.9688,
|
36008 |
+
"step": 5907
|
36009 |
+
},
|
36010 |
+
{
|
36011 |
+
"epoch": 0.05,
|
36012 |
+
"learning_rate": 0.0004,
|
36013 |
+
"loss": 7.4705,
|
36014 |
+
"step": 5908
|
36015 |
+
},
|
36016 |
+
{
|
36017 |
+
"epoch": 0.05,
|
36018 |
+
"learning_rate": 0.0004,
|
36019 |
+
"loss": 3.4179,
|
36020 |
+
"step": 5909
|
36021 |
+
},
|
36022 |
+
{
|
36023 |
+
"epoch": 0.05,
|
36024 |
+
"learning_rate": 0.0004,
|
36025 |
+
"loss": 7.4204,
|
36026 |
+
"step": 5910
|
36027 |
+
},
|
36028 |
+
{
|
36029 |
+
"epoch": 0.05,
|
36030 |
+
"learning_rate": 0.0004,
|
36031 |
+
"loss": 7.7445,
|
36032 |
+
"step": 5911
|
36033 |
+
},
|
36034 |
+
{
|
36035 |
+
"epoch": 0.05,
|
36036 |
+
"learning_rate": 0.0004,
|
36037 |
+
"loss": 6.9589,
|
36038 |
+
"step": 5912
|
36039 |
+
},
|
36040 |
+
{
|
36041 |
+
"epoch": 0.05,
|
36042 |
+
"learning_rate": 0.0004,
|
36043 |
+
"loss": 7.5771,
|
36044 |
+
"step": 5913
|
36045 |
+
},
|
36046 |
+
{
|
36047 |
+
"epoch": 0.05,
|
36048 |
+
"learning_rate": 0.0004,
|
36049 |
+
"loss": 9.1289,
|
36050 |
+
"step": 5914
|
36051 |
+
},
|
36052 |
+
{
|
36053 |
+
"epoch": 0.05,
|
36054 |
+
"learning_rate": 0.0004,
|
36055 |
+
"loss": 4.0564,
|
36056 |
+
"step": 5915
|
36057 |
+
},
|
36058 |
+
{
|
36059 |
+
"epoch": 0.05,
|
36060 |
+
"learning_rate": 0.0004,
|
36061 |
+
"loss": 7.5045,
|
36062 |
+
"step": 5916
|
36063 |
+
},
|
36064 |
+
{
|
36065 |
+
"epoch": 0.05,
|
36066 |
+
"learning_rate": 0.0004,
|
36067 |
+
"loss": 6.6193,
|
36068 |
+
"step": 5917
|
36069 |
+
},
|
36070 |
+
{
|
36071 |
+
"epoch": 0.05,
|
36072 |
+
"learning_rate": 0.0004,
|
36073 |
+
"loss": 2.9347,
|
36074 |
+
"step": 5918
|
36075 |
+
},
|
36076 |
+
{
|
36077 |
+
"epoch": 0.05,
|
36078 |
+
"learning_rate": 0.0004,
|
36079 |
+
"loss": 7.276,
|
36080 |
+
"step": 5919
|
36081 |
+
},
|
36082 |
+
{
|
36083 |
+
"epoch": 0.05,
|
36084 |
+
"learning_rate": 0.0004,
|
36085 |
+
"loss": 6.0243,
|
36086 |
+
"step": 5920
|
36087 |
+
},
|
36088 |
+
{
|
36089 |
+
"epoch": 0.05,
|
36090 |
+
"learning_rate": 0.0004,
|
36091 |
+
"loss": 8.8889,
|
36092 |
+
"step": 5921
|
36093 |
+
},
|
36094 |
+
{
|
36095 |
+
"epoch": 0.05,
|
36096 |
+
"learning_rate": 0.0004,
|
36097 |
+
"loss": 4.8016,
|
36098 |
+
"step": 5922
|
36099 |
+
},
|
36100 |
+
{
|
36101 |
+
"epoch": 0.05,
|
36102 |
+
"learning_rate": 0.0004,
|
36103 |
+
"loss": 7.6244,
|
36104 |
+
"step": 5923
|
36105 |
+
},
|
36106 |
+
{
|
36107 |
+
"epoch": 0.05,
|
36108 |
+
"learning_rate": 0.0004,
|
36109 |
+
"loss": 4.6548,
|
36110 |
+
"step": 5924
|
36111 |
+
},
|
36112 |
+
{
|
36113 |
+
"epoch": 0.05,
|
36114 |
+
"learning_rate": 0.0004,
|
36115 |
+
"loss": 5.446,
|
36116 |
+
"step": 5925
|
36117 |
+
},
|
36118 |
+
{
|
36119 |
+
"epoch": 0.05,
|
36120 |
+
"learning_rate": 0.0004,
|
36121 |
+
"loss": 3.0701,
|
36122 |
+
"step": 5926
|
36123 |
+
},
|
36124 |
+
{
|
36125 |
+
"epoch": 0.05,
|
36126 |
+
"learning_rate": 0.0004,
|
36127 |
+
"loss": 3.6489,
|
36128 |
+
"step": 5927
|
36129 |
+
},
|
36130 |
+
{
|
36131 |
+
"epoch": 0.05,
|
36132 |
+
"learning_rate": 0.0004,
|
36133 |
+
"loss": 6.8636,
|
36134 |
+
"step": 5928
|
36135 |
+
},
|
36136 |
+
{
|
36137 |
+
"epoch": 0.05,
|
36138 |
+
"learning_rate": 0.0004,
|
36139 |
+
"loss": 7.3796,
|
36140 |
+
"step": 5929
|
36141 |
+
},
|
36142 |
+
{
|
36143 |
+
"epoch": 0.05,
|
36144 |
+
"learning_rate": 0.0004,
|
36145 |
+
"loss": 6.3366,
|
36146 |
+
"step": 5930
|
36147 |
+
},
|
36148 |
+
{
|
36149 |
+
"epoch": 0.05,
|
36150 |
+
"learning_rate": 0.0004,
|
36151 |
+
"loss": 7.4844,
|
36152 |
+
"step": 5931
|
36153 |
+
},
|
36154 |
+
{
|
36155 |
+
"epoch": 0.05,
|
36156 |
+
"learning_rate": 0.0004,
|
36157 |
+
"loss": 5.5549,
|
36158 |
+
"step": 5932
|
36159 |
+
},
|
36160 |
+
{
|
36161 |
+
"epoch": 0.05,
|
36162 |
+
"learning_rate": 0.0004,
|
36163 |
+
"loss": 7.976,
|
36164 |
+
"step": 5933
|
36165 |
+
},
|
36166 |
+
{
|
36167 |
+
"epoch": 0.05,
|
36168 |
+
"learning_rate": 0.0004,
|
36169 |
+
"loss": 7.0844,
|
36170 |
+
"step": 5934
|
36171 |
+
},
|
36172 |
+
{
|
36173 |
+
"epoch": 0.05,
|
36174 |
+
"learning_rate": 0.0004,
|
36175 |
+
"loss": 3.5849,
|
36176 |
+
"step": 5935
|
36177 |
+
},
|
36178 |
+
{
|
36179 |
+
"epoch": 0.05,
|
36180 |
+
"learning_rate": 0.0004,
|
36181 |
+
"loss": 6.5648,
|
36182 |
+
"step": 5936
|
36183 |
+
},
|
36184 |
+
{
|
36185 |
+
"epoch": 0.05,
|
36186 |
+
"learning_rate": 0.0004,
|
36187 |
+
"loss": 6.8267,
|
36188 |
+
"step": 5937
|
36189 |
+
},
|
36190 |
+
{
|
36191 |
+
"epoch": 0.05,
|
36192 |
+
"learning_rate": 0.0004,
|
36193 |
+
"loss": 4.0671,
|
36194 |
+
"step": 5938
|
36195 |
+
},
|
36196 |
+
{
|
36197 |
+
"epoch": 0.05,
|
36198 |
+
"learning_rate": 0.0004,
|
36199 |
+
"loss": 5.9199,
|
36200 |
+
"step": 5939
|
36201 |
+
},
|
36202 |
+
{
|
36203 |
+
"epoch": 0.05,
|
36204 |
+
"learning_rate": 0.0004,
|
36205 |
+
"loss": 6.7518,
|
36206 |
+
"step": 5940
|
36207 |
+
},
|
36208 |
+
{
|
36209 |
+
"epoch": 0.05,
|
36210 |
+
"learning_rate": 0.0004,
|
36211 |
+
"loss": 2.9931,
|
36212 |
+
"step": 5941
|
36213 |
+
},
|
36214 |
+
{
|
36215 |
+
"epoch": 0.05,
|
36216 |
+
"learning_rate": 0.0004,
|
36217 |
+
"loss": 4.1515,
|
36218 |
+
"step": 5942
|
36219 |
+
},
|
36220 |
+
{
|
36221 |
+
"epoch": 0.05,
|
36222 |
+
"learning_rate": 0.0004,
|
36223 |
+
"loss": 5.4225,
|
36224 |
+
"step": 5943
|
36225 |
+
},
|
36226 |
+
{
|
36227 |
+
"epoch": 0.05,
|
36228 |
+
"learning_rate": 0.0004,
|
36229 |
+
"loss": 4.7662,
|
36230 |
+
"step": 5944
|
36231 |
+
},
|
36232 |
+
{
|
36233 |
+
"epoch": 0.05,
|
36234 |
+
"learning_rate": 0.0004,
|
36235 |
+
"loss": 4.7916,
|
36236 |
+
"step": 5945
|
36237 |
+
},
|
36238 |
+
{
|
36239 |
+
"epoch": 0.05,
|
36240 |
+
"learning_rate": 0.0004,
|
36241 |
+
"loss": 5.6711,
|
36242 |
+
"step": 5946
|
36243 |
+
},
|
36244 |
+
{
|
36245 |
+
"epoch": 0.05,
|
36246 |
+
"learning_rate": 0.0004,
|
36247 |
+
"loss": 6.4338,
|
36248 |
+
"step": 5947
|
36249 |
+
},
|
36250 |
+
{
|
36251 |
+
"epoch": 0.05,
|
36252 |
+
"learning_rate": 0.0004,
|
36253 |
+
"loss": 6.1612,
|
36254 |
+
"step": 5948
|
36255 |
+
},
|
36256 |
+
{
|
36257 |
+
"epoch": 0.05,
|
36258 |
+
"learning_rate": 0.0004,
|
36259 |
+
"loss": 4.3135,
|
36260 |
+
"step": 5949
|
36261 |
+
},
|
36262 |
+
{
|
36263 |
+
"epoch": 0.05,
|
36264 |
+
"learning_rate": 0.0004,
|
36265 |
+
"loss": 5.6296,
|
36266 |
+
"step": 5950
|
36267 |
+
},
|
36268 |
+
{
|
36269 |
+
"epoch": 0.05,
|
36270 |
+
"learning_rate": 0.0004,
|
36271 |
+
"loss": 8.2795,
|
36272 |
+
"step": 5951
|
36273 |
+
},
|
36274 |
+
{
|
36275 |
+
"epoch": 0.05,
|
36276 |
+
"learning_rate": 0.0004,
|
36277 |
+
"loss": 7.2667,
|
36278 |
+
"step": 5952
|
36279 |
+
},
|
36280 |
+
{
|
36281 |
+
"epoch": 0.05,
|
36282 |
+
"learning_rate": 0.0004,
|
36283 |
+
"loss": 4.4897,
|
36284 |
+
"step": 5953
|
36285 |
+
},
|
36286 |
+
{
|
36287 |
+
"epoch": 0.05,
|
36288 |
+
"learning_rate": 0.0004,
|
36289 |
+
"loss": 3.9241,
|
36290 |
+
"step": 5954
|
36291 |
+
},
|
36292 |
+
{
|
36293 |
+
"epoch": 0.05,
|
36294 |
+
"learning_rate": 0.0004,
|
36295 |
+
"loss": 7.776,
|
36296 |
+
"step": 5955
|
36297 |
+
},
|
36298 |
+
{
|
36299 |
+
"epoch": 0.05,
|
36300 |
+
"learning_rate": 0.0004,
|
36301 |
+
"loss": 7.3649,
|
36302 |
+
"step": 5956
|
36303 |
+
},
|
36304 |
+
{
|
36305 |
+
"epoch": 0.05,
|
36306 |
+
"learning_rate": 0.0004,
|
36307 |
+
"loss": 2.6375,
|
36308 |
+
"step": 5957
|
36309 |
+
},
|
36310 |
+
{
|
36311 |
+
"epoch": 0.05,
|
36312 |
+
"learning_rate": 0.0004,
|
36313 |
+
"loss": 9.0611,
|
36314 |
+
"step": 5958
|
36315 |
+
},
|
36316 |
+
{
|
36317 |
+
"epoch": 0.05,
|
36318 |
+
"learning_rate": 0.0004,
|
36319 |
+
"loss": 6.7652,
|
36320 |
+
"step": 5959
|
36321 |
+
},
|
36322 |
+
{
|
36323 |
+
"epoch": 0.05,
|
36324 |
+
"learning_rate": 0.0004,
|
36325 |
+
"loss": 8.7396,
|
36326 |
+
"step": 5960
|
36327 |
+
},
|
36328 |
+
{
|
36329 |
+
"epoch": 0.05,
|
36330 |
+
"learning_rate": 0.0004,
|
36331 |
+
"loss": 7.8184,
|
36332 |
+
"step": 5961
|
36333 |
+
},
|
36334 |
+
{
|
36335 |
+
"epoch": 0.05,
|
36336 |
+
"learning_rate": 0.0004,
|
36337 |
+
"loss": 6.9717,
|
36338 |
+
"step": 5962
|
36339 |
+
},
|
36340 |
+
{
|
36341 |
+
"epoch": 0.05,
|
36342 |
+
"learning_rate": 0.0004,
|
36343 |
+
"loss": 6.7367,
|
36344 |
+
"step": 5963
|
36345 |
+
},
|
36346 |
+
{
|
36347 |
+
"epoch": 0.05,
|
36348 |
+
"learning_rate": 0.0004,
|
36349 |
+
"loss": 5.3137,
|
36350 |
+
"step": 5964
|
36351 |
+
},
|
36352 |
+
{
|
36353 |
+
"epoch": 0.05,
|
36354 |
+
"learning_rate": 0.0004,
|
36355 |
+
"loss": 7.5619,
|
36356 |
+
"step": 5965
|
36357 |
+
},
|
36358 |
+
{
|
36359 |
+
"epoch": 0.05,
|
36360 |
+
"learning_rate": 0.0004,
|
36361 |
+
"loss": 5.5172,
|
36362 |
+
"step": 5966
|
36363 |
+
},
|
36364 |
+
{
|
36365 |
+
"epoch": 0.05,
|
36366 |
+
"learning_rate": 0.0004,
|
36367 |
+
"loss": 7.5568,
|
36368 |
+
"step": 5967
|
36369 |
+
},
|
36370 |
+
{
|
36371 |
+
"epoch": 0.05,
|
36372 |
+
"learning_rate": 0.0004,
|
36373 |
+
"loss": 8.1321,
|
36374 |
+
"step": 5968
|
36375 |
+
},
|
36376 |
+
{
|
36377 |
+
"epoch": 0.05,
|
36378 |
+
"learning_rate": 0.0004,
|
36379 |
+
"loss": 8.8486,
|
36380 |
+
"step": 5969
|
36381 |
+
},
|
36382 |
+
{
|
36383 |
+
"epoch": 0.05,
|
36384 |
+
"learning_rate": 0.0004,
|
36385 |
+
"loss": 3.6196,
|
36386 |
+
"step": 5970
|
36387 |
+
},
|
36388 |
+
{
|
36389 |
+
"epoch": 0.05,
|
36390 |
+
"learning_rate": 0.0004,
|
36391 |
+
"loss": 7.7649,
|
36392 |
+
"step": 5971
|
36393 |
+
},
|
36394 |
+
{
|
36395 |
+
"epoch": 0.05,
|
36396 |
+
"learning_rate": 0.0004,
|
36397 |
+
"loss": 8.096,
|
36398 |
+
"step": 5972
|
36399 |
+
},
|
36400 |
+
{
|
36401 |
+
"epoch": 0.05,
|
36402 |
+
"learning_rate": 0.0004,
|
36403 |
+
"loss": 3.2377,
|
36404 |
+
"step": 5973
|
36405 |
+
},
|
36406 |
+
{
|
36407 |
+
"epoch": 0.05,
|
36408 |
+
"learning_rate": 0.0004,
|
36409 |
+
"loss": 7.9327,
|
36410 |
+
"step": 5974
|
36411 |
+
},
|
36412 |
+
{
|
36413 |
+
"epoch": 0.05,
|
36414 |
+
"learning_rate": 0.0004,
|
36415 |
+
"loss": 3.0676,
|
36416 |
+
"step": 5975
|
36417 |
+
},
|
36418 |
+
{
|
36419 |
+
"epoch": 0.05,
|
36420 |
+
"learning_rate": 0.0004,
|
36421 |
+
"loss": 6.9014,
|
36422 |
+
"step": 5976
|
36423 |
+
},
|
36424 |
+
{
|
36425 |
+
"epoch": 0.05,
|
36426 |
+
"learning_rate": 0.0004,
|
36427 |
+
"loss": 7.9241,
|
36428 |
+
"step": 5977
|
36429 |
+
},
|
36430 |
+
{
|
36431 |
+
"epoch": 0.05,
|
36432 |
+
"learning_rate": 0.0004,
|
36433 |
+
"loss": 12.1662,
|
36434 |
+
"step": 5978
|
36435 |
+
},
|
36436 |
+
{
|
36437 |
+
"epoch": 0.05,
|
36438 |
+
"learning_rate": 0.0004,
|
36439 |
+
"loss": 2.9906,
|
36440 |
+
"step": 5979
|
36441 |
+
},
|
36442 |
+
{
|
36443 |
+
"epoch": 0.05,
|
36444 |
+
"learning_rate": 0.0004,
|
36445 |
+
"loss": 4.6138,
|
36446 |
+
"step": 5980
|
36447 |
+
},
|
36448 |
+
{
|
36449 |
+
"epoch": 0.05,
|
36450 |
+
"learning_rate": 0.0004,
|
36451 |
+
"loss": 2.8328,
|
36452 |
+
"step": 5981
|
36453 |
+
},
|
36454 |
+
{
|
36455 |
+
"epoch": 0.05,
|
36456 |
+
"learning_rate": 0.0004,
|
36457 |
+
"loss": 2.6569,
|
36458 |
+
"step": 5982
|
36459 |
+
},
|
36460 |
+
{
|
36461 |
+
"epoch": 0.05,
|
36462 |
+
"learning_rate": 0.0004,
|
36463 |
+
"loss": 6.6642,
|
36464 |
+
"step": 5983
|
36465 |
+
},
|
36466 |
+
{
|
36467 |
+
"epoch": 0.05,
|
36468 |
+
"learning_rate": 0.0004,
|
36469 |
+
"loss": 4.8701,
|
36470 |
+
"step": 5984
|
36471 |
+
},
|
36472 |
+
{
|
36473 |
+
"epoch": 0.05,
|
36474 |
+
"learning_rate": 0.0004,
|
36475 |
+
"loss": 2.4972,
|
36476 |
+
"step": 5985
|
36477 |
+
},
|
36478 |
+
{
|
36479 |
+
"epoch": 0.05,
|
36480 |
+
"learning_rate": 0.0004,
|
36481 |
+
"loss": 3.1518,
|
36482 |
+
"step": 5986
|
36483 |
+
},
|
36484 |
+
{
|
36485 |
+
"epoch": 0.05,
|
36486 |
+
"learning_rate": 0.0004,
|
36487 |
+
"loss": 7.1437,
|
36488 |
+
"step": 5987
|
36489 |
+
},
|
36490 |
+
{
|
36491 |
+
"epoch": 0.05,
|
36492 |
+
"learning_rate": 0.0004,
|
36493 |
+
"loss": 6.2173,
|
36494 |
+
"step": 5988
|
36495 |
+
},
|
36496 |
+
{
|
36497 |
+
"epoch": 0.05,
|
36498 |
+
"learning_rate": 0.0004,
|
36499 |
+
"loss": 6.7305,
|
36500 |
+
"step": 5989
|
36501 |
+
},
|
36502 |
+
{
|
36503 |
+
"epoch": 0.05,
|
36504 |
+
"learning_rate": 0.0004,
|
36505 |
+
"loss": 7.6896,
|
36506 |
+
"step": 5990
|
36507 |
+
},
|
36508 |
+
{
|
36509 |
+
"epoch": 0.05,
|
36510 |
+
"learning_rate": 0.0004,
|
36511 |
+
"loss": 7.5627,
|
36512 |
+
"step": 5991
|
36513 |
+
},
|
36514 |
+
{
|
36515 |
+
"epoch": 0.05,
|
36516 |
+
"learning_rate": 0.0004,
|
36517 |
+
"loss": 4.5204,
|
36518 |
+
"step": 5992
|
36519 |
+
},
|
36520 |
+
{
|
36521 |
+
"epoch": 0.05,
|
36522 |
+
"learning_rate": 0.0004,
|
36523 |
+
"loss": 5.9454,
|
36524 |
+
"step": 5993
|
36525 |
+
},
|
36526 |
+
{
|
36527 |
+
"epoch": 0.05,
|
36528 |
+
"learning_rate": 0.0004,
|
36529 |
+
"loss": 6.4362,
|
36530 |
+
"step": 5994
|
36531 |
+
},
|
36532 |
+
{
|
36533 |
+
"epoch": 0.05,
|
36534 |
+
"learning_rate": 0.0004,
|
36535 |
+
"loss": 4.0131,
|
36536 |
+
"step": 5995
|
36537 |
+
},
|
36538 |
+
{
|
36539 |
+
"epoch": 0.05,
|
36540 |
+
"learning_rate": 0.0004,
|
36541 |
+
"loss": 6.1399,
|
36542 |
+
"step": 5996
|
36543 |
+
},
|
36544 |
+
{
|
36545 |
+
"epoch": 0.05,
|
36546 |
+
"learning_rate": 0.0004,
|
36547 |
+
"loss": 7.666,
|
36548 |
+
"step": 5997
|
36549 |
+
},
|
36550 |
+
{
|
36551 |
+
"epoch": 0.05,
|
36552 |
+
"learning_rate": 0.0004,
|
36553 |
+
"loss": 8.962,
|
36554 |
+
"step": 5998
|
36555 |
+
},
|
36556 |
+
{
|
36557 |
+
"epoch": 0.05,
|
36558 |
+
"learning_rate": 0.0004,
|
36559 |
+
"loss": 3.4282,
|
36560 |
+
"step": 5999
|
36561 |
+
},
|
36562 |
+
{
|
36563 |
+
"epoch": 0.05,
|
36564 |
+
"learning_rate": 0.0004,
|
36565 |
+
"loss": 3.7265,
|
36566 |
+
"step": 6000
|
36567 |
+
},
|
36568 |
+
{
|
36569 |
+
"epoch": 0.05,
|
36570 |
+
"eval_loss": 6.473691463470459,
|
36571 |
+
"eval_runtime": 22.3658,
|
36572 |
+
"eval_samples_per_second": 2.236,
|
36573 |
+
"eval_steps_per_second": 1.118,
|
36574 |
+
"step": 6000
|
36575 |
+
},
|
36576 |
+
{
|
36577 |
+
"epoch": 0.05,
|
36578 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
36579 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
36580 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
36581 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
36582 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
36583 |
+
"mmlu_loss": 3.9286953735351564,
|
36584 |
+
"step": 6000
|
36585 |
+
},
|
36586 |
+
{
|
36587 |
+
"epoch": 0.05,
|
36588 |
+
"step": 6000,
|
36589 |
+
"total_flos": 9.88792958631936e+16,
|
36590 |
+
"train_loss": 0.5874443841576577,
|
36591 |
+
"train_runtime": 1725.6374,
|
36592 |
+
"train_samples_per_second": 17.385,
|
36593 |
+
"train_steps_per_second": 17.385
|
36594 |
}
|
36595 |
],
|
36596 |
"max_steps": 30000,
|
36597 |
"num_train_epochs": 1,
|
36598 |
+
"total_flos": 9.88792958631936e+16,
|
36599 |
"trial_name": null,
|
36600 |
"trial_params": null
|
36601 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6011
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85783faab59f5f6d8bcf691e35bb86cff435e22f3fa9169bf4e56c0239c8d7e4
|
3 |
size 6011
|