Farouk
commited on
Commit
·
fbd1944
1
Parent(s):
f277292
Training in progress, step 5600
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- all_results.json +7 -7
- checkpoint-4200/adapter_model/adapter_model/README.md +24 -0
- checkpoint-4200/adapter_model/adapter_model/adapter_model.bin +1 -1
- checkpoint-5600/README.md +20 -0
- checkpoint-5600/adapter_config.json +26 -0
- checkpoint-5600/adapter_model.bin +3 -0
- checkpoint-5600/added_tokens.json +3 -0
- checkpoint-5600/optimizer.pt +3 -0
- checkpoint-5600/rng_state.pth +3 -0
- checkpoint-5600/scheduler.pt +3 -0
- checkpoint-5600/special_tokens_map.json +6 -0
- checkpoint-5600/tokenizer.model +3 -0
- checkpoint-5600/tokenizer_config.json +35 -0
- checkpoint-5600/trainer_state.json +0 -0
- checkpoint-5600/training_args.bin +3 -0
- eval_results.json +3 -3
- metrics.json +1 -1
- train_results.json +4 -4
- trainer_state.json +3666 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 64,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"
|
|
|
18 |
"down_proj",
|
19 |
-
"q_proj",
|
20 |
"gate_proj",
|
21 |
-
"o_proj",
|
22 |
"up_proj",
|
23 |
-
"
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 64,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"o_proj",
|
18 |
+
"k_proj",
|
19 |
"down_proj",
|
|
|
20 |
"gate_proj",
|
|
|
21 |
"up_proj",
|
22 |
+
"v_proj",
|
23 |
+
"q_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 871609293
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a81912e202e5ef1e0abe63acfe2600dcb02f3768a37b847ef40a41ebbb64f69
|
3 |
size 871609293
|
all_results.json
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{
|
2 |
"epoch": 0.04,
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
-
"eval_runtime": 21.
|
5 |
-
"eval_samples_per_second": 2.
|
6 |
-
"eval_steps_per_second": 1.
|
7 |
-
"train_loss":
|
8 |
-
"train_runtime":
|
9 |
-
"train_samples_per_second":
|
10 |
-
"train_steps_per_second":
|
11 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.04,
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
+
"eval_runtime": 21.6378,
|
5 |
+
"eval_samples_per_second": 2.311,
|
6 |
+
"eval_steps_per_second": 1.155,
|
7 |
+
"train_loss": 0.6445872698006807,
|
8 |
+
"train_runtime": 1748.3273,
|
9 |
+
"train_samples_per_second": 17.159,
|
10 |
+
"train_steps_per_second": 17.159
|
11 |
}
|
checkpoint-4200/adapter_model/adapter_model/README.md
CHANGED
@@ -70,6 +70,28 @@ The following `bitsandbytes` quantization config was used during training:
|
|
70 |
- bnb_4bit_use_double_quant: True
|
71 |
- bnb_4bit_compute_dtype: bfloat16
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
The following `bitsandbytes` quantization config was used during training:
|
74 |
- load_in_8bit: False
|
75 |
- load_in_4bit: True
|
@@ -88,5 +110,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
88 |
- PEFT 0.4.0
|
89 |
- PEFT 0.4.0
|
90 |
- PEFT 0.4.0
|
|
|
|
|
91 |
|
92 |
- PEFT 0.4.0
|
|
|
70 |
- bnb_4bit_use_double_quant: True
|
71 |
- bnb_4bit_compute_dtype: bfloat16
|
72 |
|
73 |
+
The following `bitsandbytes` quantization config was used during training:
|
74 |
+
- load_in_8bit: False
|
75 |
+
- load_in_4bit: True
|
76 |
+
- llm_int8_threshold: 6.0
|
77 |
+
- llm_int8_skip_modules: None
|
78 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
79 |
+
- llm_int8_has_fp16_weight: False
|
80 |
+
- bnb_4bit_quant_type: nf4
|
81 |
+
- bnb_4bit_use_double_quant: True
|
82 |
+
- bnb_4bit_compute_dtype: bfloat16
|
83 |
+
|
84 |
+
The following `bitsandbytes` quantization config was used during training:
|
85 |
+
- load_in_8bit: False
|
86 |
+
- load_in_4bit: True
|
87 |
+
- llm_int8_threshold: 6.0
|
88 |
+
- llm_int8_skip_modules: None
|
89 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
90 |
+
- llm_int8_has_fp16_weight: False
|
91 |
+
- bnb_4bit_quant_type: nf4
|
92 |
+
- bnb_4bit_use_double_quant: True
|
93 |
+
- bnb_4bit_compute_dtype: bfloat16
|
94 |
+
|
95 |
The following `bitsandbytes` quantization config was used during training:
|
96 |
- load_in_8bit: False
|
97 |
- load_in_4bit: True
|
|
|
110 |
- PEFT 0.4.0
|
111 |
- PEFT 0.4.0
|
112 |
- PEFT 0.4.0
|
113 |
+
- PEFT 0.4.0
|
114 |
+
- PEFT 0.4.0
|
115 |
|
116 |
- PEFT 0.4.0
|
checkpoint-4200/adapter_model/adapter_model/adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 871609293
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff18c40f9b3c9fb20f1c95d4dff151244eba09eee79ae11c6121cc23181c2442
|
3 |
size 871609293
|
checkpoint-5600/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
---
|
4 |
+
## Training procedure
|
5 |
+
|
6 |
+
|
7 |
+
The following `bitsandbytes` quantization config was used during training:
|
8 |
+
- load_in_8bit: False
|
9 |
+
- load_in_4bit: True
|
10 |
+
- llm_int8_threshold: 6.0
|
11 |
+
- llm_int8_skip_modules: None
|
12 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
13 |
+
- llm_int8_has_fp16_weight: False
|
14 |
+
- bnb_4bit_quant_type: nf4
|
15 |
+
- bnb_4bit_use_double_quant: True
|
16 |
+
- bnb_4bit_compute_dtype: bfloat16
|
17 |
+
### Framework versions
|
18 |
+
|
19 |
+
|
20 |
+
- PEFT 0.4.0
|
checkpoint-5600/adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_mapping": null,
|
3 |
+
"base_model_name_or_path": "codellama/CodeLlama-34b-Python-hf",
|
4 |
+
"bias": "none",
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"layers_pattern": null,
|
9 |
+
"layers_to_transform": null,
|
10 |
+
"lora_alpha": 16.0,
|
11 |
+
"lora_dropout": 0.1,
|
12 |
+
"modules_to_save": null,
|
13 |
+
"peft_type": "LORA",
|
14 |
+
"r": 64,
|
15 |
+
"revision": null,
|
16 |
+
"target_modules": [
|
17 |
+
"o_proj",
|
18 |
+
"k_proj",
|
19 |
+
"down_proj",
|
20 |
+
"gate_proj",
|
21 |
+
"up_proj",
|
22 |
+
"v_proj",
|
23 |
+
"q_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
checkpoint-5600/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a81912e202e5ef1e0abe63acfe2600dcb02f3768a37b847ef40a41ebbb64f69
|
3 |
+
size 871609293
|
checkpoint-5600/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[PAD]": 32000
|
3 |
+
}
|
checkpoint-5600/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f1ce7f0dd170bfeebb821db5f0cfcca98b3957b20a9caeefcb11d959a230f9e
|
3 |
+
size 873872799
|
checkpoint-5600/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1eacbdddf8408ff496013b66ade44228149b42f2f803cd158b398d7288028823
|
3 |
+
size 14511
|
checkpoint-5600/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81248501833af563175f43c1d681185643b8411cee1fb1e631b8687c465eb2e3
|
3 |
+
size 627
|
checkpoint-5600/special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
checkpoint-5600/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
checkpoint-5600/tokenizer_config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<s>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": false,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "</s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"legacy": null,
|
22 |
+
"model_max_length": 1000000000000000019884624838656,
|
23 |
+
"pad_token": null,
|
24 |
+
"padding_side": "right",
|
25 |
+
"sp_model_kwargs": {},
|
26 |
+
"tokenizer_class": "LlamaTokenizer",
|
27 |
+
"unk_token": {
|
28 |
+
"__type": "AddedToken",
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false
|
34 |
+
}
|
35 |
+
}
|
checkpoint-5600/trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-5600/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe7d931ebfbcece1009124b9eae98d1a465edd703240c0655ee9bb17db395973
|
3 |
+
size 6011
|
eval_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"epoch": 0.04,
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
-
"eval_runtime": 21.
|
5 |
-
"eval_samples_per_second": 2.
|
6 |
-
"eval_steps_per_second": 1.
|
7 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.04,
|
3 |
"eval_loss": 6.335043907165527,
|
4 |
+
"eval_runtime": 21.6378,
|
5 |
+
"eval_samples_per_second": 2.311,
|
6 |
+
"eval_steps_per_second": 1.155
|
7 |
}
|
metrics.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"run_name": "codellama34b_unnatural", "train_runtime":
|
|
|
1 |
+
{"run_name": "codellama34b_unnatural", "train_runtime": 1748.3273, "train_samples_per_second": 17.159, "train_steps_per_second": 17.159, "train_loss": 0.6445872698006807, "epoch": 0.04, "eval_loss": 6.335043907165527, "eval_runtime": 21.6378, "eval_samples_per_second": 2.311, "eval_steps_per_second": 1.155}
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"epoch": 0.04,
|
3 |
-
"train_loss":
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second":
|
7 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.04,
|
3 |
+
"train_loss": 0.6445872698006807,
|
4 |
+
"train_runtime": 1748.3273,
|
5 |
+
"train_samples_per_second": 17.159,
|
6 |
+
"train_steps_per_second": 17.159
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": 6.335043907165527,
|
3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -29265,11 +29265,3674 @@
|
|
29265 |
"train_runtime": 2886.6567,
|
29266 |
"train_samples_per_second": 10.393,
|
29267 |
"train_steps_per_second": 10.393
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29268 |
}
|
29269 |
],
|
29270 |
"max_steps": 30000,
|
29271 |
"num_train_epochs": 1,
|
29272 |
-
"total_flos":
|
29273 |
"trial_name": null,
|
29274 |
"trial_params": null
|
29275 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 6.335043907165527,
|
3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
4 |
+
"epoch": 0.04124971354365595,
|
5 |
+
"global_step": 5400,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
29265 |
"train_runtime": 2886.6567,
|
29266 |
"train_samples_per_second": 10.393,
|
29267 |
"train_steps_per_second": 10.393
|
29268 |
+
},
|
29269 |
+
{
|
29270 |
+
"epoch": 0.04,
|
29271 |
+
"learning_rate": 0.0004,
|
29272 |
+
"loss": 8.196,
|
29273 |
+
"step": 4801
|
29274 |
+
},
|
29275 |
+
{
|
29276 |
+
"epoch": 0.04,
|
29277 |
+
"learning_rate": 0.0004,
|
29278 |
+
"loss": 7.4766,
|
29279 |
+
"step": 4802
|
29280 |
+
},
|
29281 |
+
{
|
29282 |
+
"epoch": 0.04,
|
29283 |
+
"learning_rate": 0.0004,
|
29284 |
+
"loss": 7.5177,
|
29285 |
+
"step": 4803
|
29286 |
+
},
|
29287 |
+
{
|
29288 |
+
"epoch": 0.04,
|
29289 |
+
"learning_rate": 0.0004,
|
29290 |
+
"loss": 7.6057,
|
29291 |
+
"step": 4804
|
29292 |
+
},
|
29293 |
+
{
|
29294 |
+
"epoch": 0.04,
|
29295 |
+
"learning_rate": 0.0004,
|
29296 |
+
"loss": 7.9972,
|
29297 |
+
"step": 4805
|
29298 |
+
},
|
29299 |
+
{
|
29300 |
+
"epoch": 0.04,
|
29301 |
+
"learning_rate": 0.0004,
|
29302 |
+
"loss": 8.3521,
|
29303 |
+
"step": 4806
|
29304 |
+
},
|
29305 |
+
{
|
29306 |
+
"epoch": 0.04,
|
29307 |
+
"learning_rate": 0.0004,
|
29308 |
+
"loss": 7.9037,
|
29309 |
+
"step": 4807
|
29310 |
+
},
|
29311 |
+
{
|
29312 |
+
"epoch": 0.04,
|
29313 |
+
"learning_rate": 0.0004,
|
29314 |
+
"loss": 8.967,
|
29315 |
+
"step": 4808
|
29316 |
+
},
|
29317 |
+
{
|
29318 |
+
"epoch": 0.04,
|
29319 |
+
"learning_rate": 0.0004,
|
29320 |
+
"loss": 6.2405,
|
29321 |
+
"step": 4809
|
29322 |
+
},
|
29323 |
+
{
|
29324 |
+
"epoch": 0.04,
|
29325 |
+
"learning_rate": 0.0004,
|
29326 |
+
"loss": 8.5253,
|
29327 |
+
"step": 4810
|
29328 |
+
},
|
29329 |
+
{
|
29330 |
+
"epoch": 0.04,
|
29331 |
+
"learning_rate": 0.0004,
|
29332 |
+
"loss": 4.2458,
|
29333 |
+
"step": 4811
|
29334 |
+
},
|
29335 |
+
{
|
29336 |
+
"epoch": 0.04,
|
29337 |
+
"learning_rate": 0.0004,
|
29338 |
+
"loss": 7.179,
|
29339 |
+
"step": 4812
|
29340 |
+
},
|
29341 |
+
{
|
29342 |
+
"epoch": 0.04,
|
29343 |
+
"learning_rate": 0.0004,
|
29344 |
+
"loss": 7.6969,
|
29345 |
+
"step": 4813
|
29346 |
+
},
|
29347 |
+
{
|
29348 |
+
"epoch": 0.04,
|
29349 |
+
"learning_rate": 0.0004,
|
29350 |
+
"loss": 9.2289,
|
29351 |
+
"step": 4814
|
29352 |
+
},
|
29353 |
+
{
|
29354 |
+
"epoch": 0.04,
|
29355 |
+
"learning_rate": 0.0004,
|
29356 |
+
"loss": 7.0946,
|
29357 |
+
"step": 4815
|
29358 |
+
},
|
29359 |
+
{
|
29360 |
+
"epoch": 0.04,
|
29361 |
+
"learning_rate": 0.0004,
|
29362 |
+
"loss": 7.9045,
|
29363 |
+
"step": 4816
|
29364 |
+
},
|
29365 |
+
{
|
29366 |
+
"epoch": 0.04,
|
29367 |
+
"learning_rate": 0.0004,
|
29368 |
+
"loss": 9.6952,
|
29369 |
+
"step": 4817
|
29370 |
+
},
|
29371 |
+
{
|
29372 |
+
"epoch": 0.04,
|
29373 |
+
"learning_rate": 0.0004,
|
29374 |
+
"loss": 6.7265,
|
29375 |
+
"step": 4818
|
29376 |
+
},
|
29377 |
+
{
|
29378 |
+
"epoch": 0.04,
|
29379 |
+
"learning_rate": 0.0004,
|
29380 |
+
"loss": 2.9574,
|
29381 |
+
"step": 4819
|
29382 |
+
},
|
29383 |
+
{
|
29384 |
+
"epoch": 0.04,
|
29385 |
+
"learning_rate": 0.0004,
|
29386 |
+
"loss": 7.3774,
|
29387 |
+
"step": 4820
|
29388 |
+
},
|
29389 |
+
{
|
29390 |
+
"epoch": 0.04,
|
29391 |
+
"learning_rate": 0.0004,
|
29392 |
+
"loss": 6.7837,
|
29393 |
+
"step": 4821
|
29394 |
+
},
|
29395 |
+
{
|
29396 |
+
"epoch": 0.04,
|
29397 |
+
"learning_rate": 0.0004,
|
29398 |
+
"loss": 3.3796,
|
29399 |
+
"step": 4822
|
29400 |
+
},
|
29401 |
+
{
|
29402 |
+
"epoch": 0.04,
|
29403 |
+
"learning_rate": 0.0004,
|
29404 |
+
"loss": 6.4443,
|
29405 |
+
"step": 4823
|
29406 |
+
},
|
29407 |
+
{
|
29408 |
+
"epoch": 0.04,
|
29409 |
+
"learning_rate": 0.0004,
|
29410 |
+
"loss": 8.7734,
|
29411 |
+
"step": 4824
|
29412 |
+
},
|
29413 |
+
{
|
29414 |
+
"epoch": 0.04,
|
29415 |
+
"learning_rate": 0.0004,
|
29416 |
+
"loss": 9.5535,
|
29417 |
+
"step": 4825
|
29418 |
+
},
|
29419 |
+
{
|
29420 |
+
"epoch": 0.04,
|
29421 |
+
"learning_rate": 0.0004,
|
29422 |
+
"loss": 6.1014,
|
29423 |
+
"step": 4826
|
29424 |
+
},
|
29425 |
+
{
|
29426 |
+
"epoch": 0.04,
|
29427 |
+
"learning_rate": 0.0004,
|
29428 |
+
"loss": 3.5574,
|
29429 |
+
"step": 4827
|
29430 |
+
},
|
29431 |
+
{
|
29432 |
+
"epoch": 0.04,
|
29433 |
+
"learning_rate": 0.0004,
|
29434 |
+
"loss": 4.5114,
|
29435 |
+
"step": 4828
|
29436 |
+
},
|
29437 |
+
{
|
29438 |
+
"epoch": 0.04,
|
29439 |
+
"learning_rate": 0.0004,
|
29440 |
+
"loss": 3.431,
|
29441 |
+
"step": 4829
|
29442 |
+
},
|
29443 |
+
{
|
29444 |
+
"epoch": 0.04,
|
29445 |
+
"learning_rate": 0.0004,
|
29446 |
+
"loss": 7.8042,
|
29447 |
+
"step": 4830
|
29448 |
+
},
|
29449 |
+
{
|
29450 |
+
"epoch": 0.04,
|
29451 |
+
"learning_rate": 0.0004,
|
29452 |
+
"loss": 2.4997,
|
29453 |
+
"step": 4831
|
29454 |
+
},
|
29455 |
+
{
|
29456 |
+
"epoch": 0.04,
|
29457 |
+
"learning_rate": 0.0004,
|
29458 |
+
"loss": 7.9027,
|
29459 |
+
"step": 4832
|
29460 |
+
},
|
29461 |
+
{
|
29462 |
+
"epoch": 0.04,
|
29463 |
+
"learning_rate": 0.0004,
|
29464 |
+
"loss": 4.7126,
|
29465 |
+
"step": 4833
|
29466 |
+
},
|
29467 |
+
{
|
29468 |
+
"epoch": 0.04,
|
29469 |
+
"learning_rate": 0.0004,
|
29470 |
+
"loss": 2.8638,
|
29471 |
+
"step": 4834
|
29472 |
+
},
|
29473 |
+
{
|
29474 |
+
"epoch": 0.04,
|
29475 |
+
"learning_rate": 0.0004,
|
29476 |
+
"loss": 6.4997,
|
29477 |
+
"step": 4835
|
29478 |
+
},
|
29479 |
+
{
|
29480 |
+
"epoch": 0.04,
|
29481 |
+
"learning_rate": 0.0004,
|
29482 |
+
"loss": 4.8501,
|
29483 |
+
"step": 4836
|
29484 |
+
},
|
29485 |
+
{
|
29486 |
+
"epoch": 0.04,
|
29487 |
+
"learning_rate": 0.0004,
|
29488 |
+
"loss": 2.6346,
|
29489 |
+
"step": 4837
|
29490 |
+
},
|
29491 |
+
{
|
29492 |
+
"epoch": 0.04,
|
29493 |
+
"learning_rate": 0.0004,
|
29494 |
+
"loss": 2.8403,
|
29495 |
+
"step": 4838
|
29496 |
+
},
|
29497 |
+
{
|
29498 |
+
"epoch": 0.04,
|
29499 |
+
"learning_rate": 0.0004,
|
29500 |
+
"loss": 6.8362,
|
29501 |
+
"step": 4839
|
29502 |
+
},
|
29503 |
+
{
|
29504 |
+
"epoch": 0.04,
|
29505 |
+
"learning_rate": 0.0004,
|
29506 |
+
"loss": 2.8393,
|
29507 |
+
"step": 4840
|
29508 |
+
},
|
29509 |
+
{
|
29510 |
+
"epoch": 0.04,
|
29511 |
+
"learning_rate": 0.0004,
|
29512 |
+
"loss": 2.6428,
|
29513 |
+
"step": 4841
|
29514 |
+
},
|
29515 |
+
{
|
29516 |
+
"epoch": 0.04,
|
29517 |
+
"learning_rate": 0.0004,
|
29518 |
+
"loss": 5.9946,
|
29519 |
+
"step": 4842
|
29520 |
+
},
|
29521 |
+
{
|
29522 |
+
"epoch": 0.04,
|
29523 |
+
"learning_rate": 0.0004,
|
29524 |
+
"loss": 4.3163,
|
29525 |
+
"step": 4843
|
29526 |
+
},
|
29527 |
+
{
|
29528 |
+
"epoch": 0.04,
|
29529 |
+
"learning_rate": 0.0004,
|
29530 |
+
"loss": 6.9659,
|
29531 |
+
"step": 4844
|
29532 |
+
},
|
29533 |
+
{
|
29534 |
+
"epoch": 0.04,
|
29535 |
+
"learning_rate": 0.0004,
|
29536 |
+
"loss": 6.5787,
|
29537 |
+
"step": 4845
|
29538 |
+
},
|
29539 |
+
{
|
29540 |
+
"epoch": 0.04,
|
29541 |
+
"learning_rate": 0.0004,
|
29542 |
+
"loss": 9.0435,
|
29543 |
+
"step": 4846
|
29544 |
+
},
|
29545 |
+
{
|
29546 |
+
"epoch": 0.04,
|
29547 |
+
"learning_rate": 0.0004,
|
29548 |
+
"loss": 5.6627,
|
29549 |
+
"step": 4847
|
29550 |
+
},
|
29551 |
+
{
|
29552 |
+
"epoch": 0.04,
|
29553 |
+
"learning_rate": 0.0004,
|
29554 |
+
"loss": 7.0435,
|
29555 |
+
"step": 4848
|
29556 |
+
},
|
29557 |
+
{
|
29558 |
+
"epoch": 0.04,
|
29559 |
+
"learning_rate": 0.0004,
|
29560 |
+
"loss": 7.584,
|
29561 |
+
"step": 4849
|
29562 |
+
},
|
29563 |
+
{
|
29564 |
+
"epoch": 0.04,
|
29565 |
+
"learning_rate": 0.0004,
|
29566 |
+
"loss": 5.5761,
|
29567 |
+
"step": 4850
|
29568 |
+
},
|
29569 |
+
{
|
29570 |
+
"epoch": 0.04,
|
29571 |
+
"learning_rate": 0.0004,
|
29572 |
+
"loss": 8.0644,
|
29573 |
+
"step": 4851
|
29574 |
+
},
|
29575 |
+
{
|
29576 |
+
"epoch": 0.04,
|
29577 |
+
"learning_rate": 0.0004,
|
29578 |
+
"loss": 6.7897,
|
29579 |
+
"step": 4852
|
29580 |
+
},
|
29581 |
+
{
|
29582 |
+
"epoch": 0.04,
|
29583 |
+
"learning_rate": 0.0004,
|
29584 |
+
"loss": 7.7933,
|
29585 |
+
"step": 4853
|
29586 |
+
},
|
29587 |
+
{
|
29588 |
+
"epoch": 0.04,
|
29589 |
+
"learning_rate": 0.0004,
|
29590 |
+
"loss": 8.0918,
|
29591 |
+
"step": 4854
|
29592 |
+
},
|
29593 |
+
{
|
29594 |
+
"epoch": 0.04,
|
29595 |
+
"learning_rate": 0.0004,
|
29596 |
+
"loss": 8.1191,
|
29597 |
+
"step": 4855
|
29598 |
+
},
|
29599 |
+
{
|
29600 |
+
"epoch": 0.04,
|
29601 |
+
"learning_rate": 0.0004,
|
29602 |
+
"loss": 5.6498,
|
29603 |
+
"step": 4856
|
29604 |
+
},
|
29605 |
+
{
|
29606 |
+
"epoch": 0.04,
|
29607 |
+
"learning_rate": 0.0004,
|
29608 |
+
"loss": 3.1834,
|
29609 |
+
"step": 4857
|
29610 |
+
},
|
29611 |
+
{
|
29612 |
+
"epoch": 0.04,
|
29613 |
+
"learning_rate": 0.0004,
|
29614 |
+
"loss": 7.5713,
|
29615 |
+
"step": 4858
|
29616 |
+
},
|
29617 |
+
{
|
29618 |
+
"epoch": 0.04,
|
29619 |
+
"learning_rate": 0.0004,
|
29620 |
+
"loss": 6.643,
|
29621 |
+
"step": 4859
|
29622 |
+
},
|
29623 |
+
{
|
29624 |
+
"epoch": 0.04,
|
29625 |
+
"learning_rate": 0.0004,
|
29626 |
+
"loss": 6.0051,
|
29627 |
+
"step": 4860
|
29628 |
+
},
|
29629 |
+
{
|
29630 |
+
"epoch": 0.04,
|
29631 |
+
"learning_rate": 0.0004,
|
29632 |
+
"loss": 5.3192,
|
29633 |
+
"step": 4861
|
29634 |
+
},
|
29635 |
+
{
|
29636 |
+
"epoch": 0.04,
|
29637 |
+
"learning_rate": 0.0004,
|
29638 |
+
"loss": 6.6787,
|
29639 |
+
"step": 4862
|
29640 |
+
},
|
29641 |
+
{
|
29642 |
+
"epoch": 0.04,
|
29643 |
+
"learning_rate": 0.0004,
|
29644 |
+
"loss": 6.1336,
|
29645 |
+
"step": 4863
|
29646 |
+
},
|
29647 |
+
{
|
29648 |
+
"epoch": 0.04,
|
29649 |
+
"learning_rate": 0.0004,
|
29650 |
+
"loss": 4.1196,
|
29651 |
+
"step": 4864
|
29652 |
+
},
|
29653 |
+
{
|
29654 |
+
"epoch": 0.04,
|
29655 |
+
"learning_rate": 0.0004,
|
29656 |
+
"loss": 7.7662,
|
29657 |
+
"step": 4865
|
29658 |
+
},
|
29659 |
+
{
|
29660 |
+
"epoch": 0.04,
|
29661 |
+
"learning_rate": 0.0004,
|
29662 |
+
"loss": 6.5099,
|
29663 |
+
"step": 4866
|
29664 |
+
},
|
29665 |
+
{
|
29666 |
+
"epoch": 0.04,
|
29667 |
+
"learning_rate": 0.0004,
|
29668 |
+
"loss": 6.4698,
|
29669 |
+
"step": 4867
|
29670 |
+
},
|
29671 |
+
{
|
29672 |
+
"epoch": 0.04,
|
29673 |
+
"learning_rate": 0.0004,
|
29674 |
+
"loss": 2.9245,
|
29675 |
+
"step": 4868
|
29676 |
+
},
|
29677 |
+
{
|
29678 |
+
"epoch": 0.04,
|
29679 |
+
"learning_rate": 0.0004,
|
29680 |
+
"loss": 3.0627,
|
29681 |
+
"step": 4869
|
29682 |
+
},
|
29683 |
+
{
|
29684 |
+
"epoch": 0.04,
|
29685 |
+
"learning_rate": 0.0004,
|
29686 |
+
"loss": 4.4951,
|
29687 |
+
"step": 4870
|
29688 |
+
},
|
29689 |
+
{
|
29690 |
+
"epoch": 0.04,
|
29691 |
+
"learning_rate": 0.0004,
|
29692 |
+
"loss": 9.7325,
|
29693 |
+
"step": 4871
|
29694 |
+
},
|
29695 |
+
{
|
29696 |
+
"epoch": 0.04,
|
29697 |
+
"learning_rate": 0.0004,
|
29698 |
+
"loss": 5.3552,
|
29699 |
+
"step": 4872
|
29700 |
+
},
|
29701 |
+
{
|
29702 |
+
"epoch": 0.04,
|
29703 |
+
"learning_rate": 0.0004,
|
29704 |
+
"loss": 4.0359,
|
29705 |
+
"step": 4873
|
29706 |
+
},
|
29707 |
+
{
|
29708 |
+
"epoch": 0.04,
|
29709 |
+
"learning_rate": 0.0004,
|
29710 |
+
"loss": 5.259,
|
29711 |
+
"step": 4874
|
29712 |
+
},
|
29713 |
+
{
|
29714 |
+
"epoch": 0.04,
|
29715 |
+
"learning_rate": 0.0004,
|
29716 |
+
"loss": 7.39,
|
29717 |
+
"step": 4875
|
29718 |
+
},
|
29719 |
+
{
|
29720 |
+
"epoch": 0.04,
|
29721 |
+
"learning_rate": 0.0004,
|
29722 |
+
"loss": 5.2283,
|
29723 |
+
"step": 4876
|
29724 |
+
},
|
29725 |
+
{
|
29726 |
+
"epoch": 0.04,
|
29727 |
+
"learning_rate": 0.0004,
|
29728 |
+
"loss": 7.2338,
|
29729 |
+
"step": 4877
|
29730 |
+
},
|
29731 |
+
{
|
29732 |
+
"epoch": 0.04,
|
29733 |
+
"learning_rate": 0.0004,
|
29734 |
+
"loss": 4.5157,
|
29735 |
+
"step": 4878
|
29736 |
+
},
|
29737 |
+
{
|
29738 |
+
"epoch": 0.04,
|
29739 |
+
"learning_rate": 0.0004,
|
29740 |
+
"loss": 6.7822,
|
29741 |
+
"step": 4879
|
29742 |
+
},
|
29743 |
+
{
|
29744 |
+
"epoch": 0.04,
|
29745 |
+
"learning_rate": 0.0004,
|
29746 |
+
"loss": 5.4882,
|
29747 |
+
"step": 4880
|
29748 |
+
},
|
29749 |
+
{
|
29750 |
+
"epoch": 0.04,
|
29751 |
+
"learning_rate": 0.0004,
|
29752 |
+
"loss": 2.9394,
|
29753 |
+
"step": 4881
|
29754 |
+
},
|
29755 |
+
{
|
29756 |
+
"epoch": 0.04,
|
29757 |
+
"learning_rate": 0.0004,
|
29758 |
+
"loss": 9.7844,
|
29759 |
+
"step": 4882
|
29760 |
+
},
|
29761 |
+
{
|
29762 |
+
"epoch": 0.04,
|
29763 |
+
"learning_rate": 0.0004,
|
29764 |
+
"loss": 4.0808,
|
29765 |
+
"step": 4883
|
29766 |
+
},
|
29767 |
+
{
|
29768 |
+
"epoch": 0.04,
|
29769 |
+
"learning_rate": 0.0004,
|
29770 |
+
"loss": 5.4349,
|
29771 |
+
"step": 4884
|
29772 |
+
},
|
29773 |
+
{
|
29774 |
+
"epoch": 0.04,
|
29775 |
+
"learning_rate": 0.0004,
|
29776 |
+
"loss": 4.7747,
|
29777 |
+
"step": 4885
|
29778 |
+
},
|
29779 |
+
{
|
29780 |
+
"epoch": 0.04,
|
29781 |
+
"learning_rate": 0.0004,
|
29782 |
+
"loss": 3.2778,
|
29783 |
+
"step": 4886
|
29784 |
+
},
|
29785 |
+
{
|
29786 |
+
"epoch": 0.04,
|
29787 |
+
"learning_rate": 0.0004,
|
29788 |
+
"loss": 7.9457,
|
29789 |
+
"step": 4887
|
29790 |
+
},
|
29791 |
+
{
|
29792 |
+
"epoch": 0.04,
|
29793 |
+
"learning_rate": 0.0004,
|
29794 |
+
"loss": 3.196,
|
29795 |
+
"step": 4888
|
29796 |
+
},
|
29797 |
+
{
|
29798 |
+
"epoch": 0.04,
|
29799 |
+
"learning_rate": 0.0004,
|
29800 |
+
"loss": 4.2576,
|
29801 |
+
"step": 4889
|
29802 |
+
},
|
29803 |
+
{
|
29804 |
+
"epoch": 0.04,
|
29805 |
+
"learning_rate": 0.0004,
|
29806 |
+
"loss": 6.3978,
|
29807 |
+
"step": 4890
|
29808 |
+
},
|
29809 |
+
{
|
29810 |
+
"epoch": 0.04,
|
29811 |
+
"learning_rate": 0.0004,
|
29812 |
+
"loss": 7.4963,
|
29813 |
+
"step": 4891
|
29814 |
+
},
|
29815 |
+
{
|
29816 |
+
"epoch": 0.04,
|
29817 |
+
"learning_rate": 0.0004,
|
29818 |
+
"loss": 8.8915,
|
29819 |
+
"step": 4892
|
29820 |
+
},
|
29821 |
+
{
|
29822 |
+
"epoch": 0.04,
|
29823 |
+
"learning_rate": 0.0004,
|
29824 |
+
"loss": 2.8344,
|
29825 |
+
"step": 4893
|
29826 |
+
},
|
29827 |
+
{
|
29828 |
+
"epoch": 0.04,
|
29829 |
+
"learning_rate": 0.0004,
|
29830 |
+
"loss": 6.5248,
|
29831 |
+
"step": 4894
|
29832 |
+
},
|
29833 |
+
{
|
29834 |
+
"epoch": 0.04,
|
29835 |
+
"learning_rate": 0.0004,
|
29836 |
+
"loss": 2.9729,
|
29837 |
+
"step": 4895
|
29838 |
+
},
|
29839 |
+
{
|
29840 |
+
"epoch": 0.04,
|
29841 |
+
"learning_rate": 0.0004,
|
29842 |
+
"loss": 2.7504,
|
29843 |
+
"step": 4896
|
29844 |
+
},
|
29845 |
+
{
|
29846 |
+
"epoch": 0.04,
|
29847 |
+
"learning_rate": 0.0004,
|
29848 |
+
"loss": 4.4288,
|
29849 |
+
"step": 4897
|
29850 |
+
},
|
29851 |
+
{
|
29852 |
+
"epoch": 0.04,
|
29853 |
+
"learning_rate": 0.0004,
|
29854 |
+
"loss": 5.173,
|
29855 |
+
"step": 4898
|
29856 |
+
},
|
29857 |
+
{
|
29858 |
+
"epoch": 0.04,
|
29859 |
+
"learning_rate": 0.0004,
|
29860 |
+
"loss": 2.6288,
|
29861 |
+
"step": 4899
|
29862 |
+
},
|
29863 |
+
{
|
29864 |
+
"epoch": 0.04,
|
29865 |
+
"learning_rate": 0.0004,
|
29866 |
+
"loss": 7.3934,
|
29867 |
+
"step": 4900
|
29868 |
+
},
|
29869 |
+
{
|
29870 |
+
"epoch": 0.04,
|
29871 |
+
"learning_rate": 0.0004,
|
29872 |
+
"loss": 2.8056,
|
29873 |
+
"step": 4901
|
29874 |
+
},
|
29875 |
+
{
|
29876 |
+
"epoch": 0.04,
|
29877 |
+
"learning_rate": 0.0004,
|
29878 |
+
"loss": 6.7523,
|
29879 |
+
"step": 4902
|
29880 |
+
},
|
29881 |
+
{
|
29882 |
+
"epoch": 0.04,
|
29883 |
+
"learning_rate": 0.0004,
|
29884 |
+
"loss": 7.6066,
|
29885 |
+
"step": 4903
|
29886 |
+
},
|
29887 |
+
{
|
29888 |
+
"epoch": 0.04,
|
29889 |
+
"learning_rate": 0.0004,
|
29890 |
+
"loss": 8.6161,
|
29891 |
+
"step": 4904
|
29892 |
+
},
|
29893 |
+
{
|
29894 |
+
"epoch": 0.04,
|
29895 |
+
"learning_rate": 0.0004,
|
29896 |
+
"loss": 7.8099,
|
29897 |
+
"step": 4905
|
29898 |
+
},
|
29899 |
+
{
|
29900 |
+
"epoch": 0.04,
|
29901 |
+
"learning_rate": 0.0004,
|
29902 |
+
"loss": 5.2048,
|
29903 |
+
"step": 4906
|
29904 |
+
},
|
29905 |
+
{
|
29906 |
+
"epoch": 0.04,
|
29907 |
+
"learning_rate": 0.0004,
|
29908 |
+
"loss": 2.6112,
|
29909 |
+
"step": 4907
|
29910 |
+
},
|
29911 |
+
{
|
29912 |
+
"epoch": 0.04,
|
29913 |
+
"learning_rate": 0.0004,
|
29914 |
+
"loss": 8.5394,
|
29915 |
+
"step": 4908
|
29916 |
+
},
|
29917 |
+
{
|
29918 |
+
"epoch": 0.04,
|
29919 |
+
"learning_rate": 0.0004,
|
29920 |
+
"loss": 7.9661,
|
29921 |
+
"step": 4909
|
29922 |
+
},
|
29923 |
+
{
|
29924 |
+
"epoch": 0.04,
|
29925 |
+
"learning_rate": 0.0004,
|
29926 |
+
"loss": 7.1945,
|
29927 |
+
"step": 4910
|
29928 |
+
},
|
29929 |
+
{
|
29930 |
+
"epoch": 0.04,
|
29931 |
+
"learning_rate": 0.0004,
|
29932 |
+
"loss": 7.1823,
|
29933 |
+
"step": 4911
|
29934 |
+
},
|
29935 |
+
{
|
29936 |
+
"epoch": 0.04,
|
29937 |
+
"learning_rate": 0.0004,
|
29938 |
+
"loss": 6.5774,
|
29939 |
+
"step": 4912
|
29940 |
+
},
|
29941 |
+
{
|
29942 |
+
"epoch": 0.04,
|
29943 |
+
"learning_rate": 0.0004,
|
29944 |
+
"loss": 7.3444,
|
29945 |
+
"step": 4913
|
29946 |
+
},
|
29947 |
+
{
|
29948 |
+
"epoch": 0.04,
|
29949 |
+
"learning_rate": 0.0004,
|
29950 |
+
"loss": 7.8732,
|
29951 |
+
"step": 4914
|
29952 |
+
},
|
29953 |
+
{
|
29954 |
+
"epoch": 0.04,
|
29955 |
+
"learning_rate": 0.0004,
|
29956 |
+
"loss": 3.5685,
|
29957 |
+
"step": 4915
|
29958 |
+
},
|
29959 |
+
{
|
29960 |
+
"epoch": 0.04,
|
29961 |
+
"learning_rate": 0.0004,
|
29962 |
+
"loss": 4.6944,
|
29963 |
+
"step": 4916
|
29964 |
+
},
|
29965 |
+
{
|
29966 |
+
"epoch": 0.04,
|
29967 |
+
"learning_rate": 0.0004,
|
29968 |
+
"loss": 8.9668,
|
29969 |
+
"step": 4917
|
29970 |
+
},
|
29971 |
+
{
|
29972 |
+
"epoch": 0.04,
|
29973 |
+
"learning_rate": 0.0004,
|
29974 |
+
"loss": 2.9854,
|
29975 |
+
"step": 4918
|
29976 |
+
},
|
29977 |
+
{
|
29978 |
+
"epoch": 0.04,
|
29979 |
+
"learning_rate": 0.0004,
|
29980 |
+
"loss": 9.0986,
|
29981 |
+
"step": 4919
|
29982 |
+
},
|
29983 |
+
{
|
29984 |
+
"epoch": 0.04,
|
29985 |
+
"learning_rate": 0.0004,
|
29986 |
+
"loss": 6.8546,
|
29987 |
+
"step": 4920
|
29988 |
+
},
|
29989 |
+
{
|
29990 |
+
"epoch": 0.04,
|
29991 |
+
"learning_rate": 0.0004,
|
29992 |
+
"loss": 2.658,
|
29993 |
+
"step": 4921
|
29994 |
+
},
|
29995 |
+
{
|
29996 |
+
"epoch": 0.04,
|
29997 |
+
"learning_rate": 0.0004,
|
29998 |
+
"loss": 2.8595,
|
29999 |
+
"step": 4922
|
30000 |
+
},
|
30001 |
+
{
|
30002 |
+
"epoch": 0.04,
|
30003 |
+
"learning_rate": 0.0004,
|
30004 |
+
"loss": 6.3526,
|
30005 |
+
"step": 4923
|
30006 |
+
},
|
30007 |
+
{
|
30008 |
+
"epoch": 0.04,
|
30009 |
+
"learning_rate": 0.0004,
|
30010 |
+
"loss": 6.6612,
|
30011 |
+
"step": 4924
|
30012 |
+
},
|
30013 |
+
{
|
30014 |
+
"epoch": 0.04,
|
30015 |
+
"learning_rate": 0.0004,
|
30016 |
+
"loss": 6.4798,
|
30017 |
+
"step": 4925
|
30018 |
+
},
|
30019 |
+
{
|
30020 |
+
"epoch": 0.04,
|
30021 |
+
"learning_rate": 0.0004,
|
30022 |
+
"loss": 6.0779,
|
30023 |
+
"step": 4926
|
30024 |
+
},
|
30025 |
+
{
|
30026 |
+
"epoch": 0.04,
|
30027 |
+
"learning_rate": 0.0004,
|
30028 |
+
"loss": 2.8211,
|
30029 |
+
"step": 4927
|
30030 |
+
},
|
30031 |
+
{
|
30032 |
+
"epoch": 0.04,
|
30033 |
+
"learning_rate": 0.0004,
|
30034 |
+
"loss": 7.9007,
|
30035 |
+
"step": 4928
|
30036 |
+
},
|
30037 |
+
{
|
30038 |
+
"epoch": 0.04,
|
30039 |
+
"learning_rate": 0.0004,
|
30040 |
+
"loss": 2.5789,
|
30041 |
+
"step": 4929
|
30042 |
+
},
|
30043 |
+
{
|
30044 |
+
"epoch": 0.04,
|
30045 |
+
"learning_rate": 0.0004,
|
30046 |
+
"loss": 8.0357,
|
30047 |
+
"step": 4930
|
30048 |
+
},
|
30049 |
+
{
|
30050 |
+
"epoch": 0.04,
|
30051 |
+
"learning_rate": 0.0004,
|
30052 |
+
"loss": 6.8846,
|
30053 |
+
"step": 4931
|
30054 |
+
},
|
30055 |
+
{
|
30056 |
+
"epoch": 0.04,
|
30057 |
+
"learning_rate": 0.0004,
|
30058 |
+
"loss": 5.7409,
|
30059 |
+
"step": 4932
|
30060 |
+
},
|
30061 |
+
{
|
30062 |
+
"epoch": 0.04,
|
30063 |
+
"learning_rate": 0.0004,
|
30064 |
+
"loss": 8.4081,
|
30065 |
+
"step": 4933
|
30066 |
+
},
|
30067 |
+
{
|
30068 |
+
"epoch": 0.04,
|
30069 |
+
"learning_rate": 0.0004,
|
30070 |
+
"loss": 7.3187,
|
30071 |
+
"step": 4934
|
30072 |
+
},
|
30073 |
+
{
|
30074 |
+
"epoch": 0.04,
|
30075 |
+
"learning_rate": 0.0004,
|
30076 |
+
"loss": 8.1926,
|
30077 |
+
"step": 4935
|
30078 |
+
},
|
30079 |
+
{
|
30080 |
+
"epoch": 0.04,
|
30081 |
+
"learning_rate": 0.0004,
|
30082 |
+
"loss": 8.2912,
|
30083 |
+
"step": 4936
|
30084 |
+
},
|
30085 |
+
{
|
30086 |
+
"epoch": 0.04,
|
30087 |
+
"learning_rate": 0.0004,
|
30088 |
+
"loss": 6.6701,
|
30089 |
+
"step": 4937
|
30090 |
+
},
|
30091 |
+
{
|
30092 |
+
"epoch": 0.04,
|
30093 |
+
"learning_rate": 0.0004,
|
30094 |
+
"loss": 4.8162,
|
30095 |
+
"step": 4938
|
30096 |
+
},
|
30097 |
+
{
|
30098 |
+
"epoch": 0.04,
|
30099 |
+
"learning_rate": 0.0004,
|
30100 |
+
"loss": 2.7585,
|
30101 |
+
"step": 4939
|
30102 |
+
},
|
30103 |
+
{
|
30104 |
+
"epoch": 0.04,
|
30105 |
+
"learning_rate": 0.0004,
|
30106 |
+
"loss": 6.6232,
|
30107 |
+
"step": 4940
|
30108 |
+
},
|
30109 |
+
{
|
30110 |
+
"epoch": 0.04,
|
30111 |
+
"learning_rate": 0.0004,
|
30112 |
+
"loss": 7.9613,
|
30113 |
+
"step": 4941
|
30114 |
+
},
|
30115 |
+
{
|
30116 |
+
"epoch": 0.04,
|
30117 |
+
"learning_rate": 0.0004,
|
30118 |
+
"loss": 3.954,
|
30119 |
+
"step": 4942
|
30120 |
+
},
|
30121 |
+
{
|
30122 |
+
"epoch": 0.04,
|
30123 |
+
"learning_rate": 0.0004,
|
30124 |
+
"loss": 2.7287,
|
30125 |
+
"step": 4943
|
30126 |
+
},
|
30127 |
+
{
|
30128 |
+
"epoch": 0.04,
|
30129 |
+
"learning_rate": 0.0004,
|
30130 |
+
"loss": 4.6305,
|
30131 |
+
"step": 4944
|
30132 |
+
},
|
30133 |
+
{
|
30134 |
+
"epoch": 0.04,
|
30135 |
+
"learning_rate": 0.0004,
|
30136 |
+
"loss": 2.6932,
|
30137 |
+
"step": 4945
|
30138 |
+
},
|
30139 |
+
{
|
30140 |
+
"epoch": 0.04,
|
30141 |
+
"learning_rate": 0.0004,
|
30142 |
+
"loss": 2.6798,
|
30143 |
+
"step": 4946
|
30144 |
+
},
|
30145 |
+
{
|
30146 |
+
"epoch": 0.04,
|
30147 |
+
"learning_rate": 0.0004,
|
30148 |
+
"loss": 3.6665,
|
30149 |
+
"step": 4947
|
30150 |
+
},
|
30151 |
+
{
|
30152 |
+
"epoch": 0.04,
|
30153 |
+
"learning_rate": 0.0004,
|
30154 |
+
"loss": 6.1462,
|
30155 |
+
"step": 4948
|
30156 |
+
},
|
30157 |
+
{
|
30158 |
+
"epoch": 0.04,
|
30159 |
+
"learning_rate": 0.0004,
|
30160 |
+
"loss": 4.0676,
|
30161 |
+
"step": 4949
|
30162 |
+
},
|
30163 |
+
{
|
30164 |
+
"epoch": 0.04,
|
30165 |
+
"learning_rate": 0.0004,
|
30166 |
+
"loss": 4.2834,
|
30167 |
+
"step": 4950
|
30168 |
+
},
|
30169 |
+
{
|
30170 |
+
"epoch": 0.04,
|
30171 |
+
"learning_rate": 0.0004,
|
30172 |
+
"loss": 5.8273,
|
30173 |
+
"step": 4951
|
30174 |
+
},
|
30175 |
+
{
|
30176 |
+
"epoch": 0.04,
|
30177 |
+
"learning_rate": 0.0004,
|
30178 |
+
"loss": 7.635,
|
30179 |
+
"step": 4952
|
30180 |
+
},
|
30181 |
+
{
|
30182 |
+
"epoch": 0.04,
|
30183 |
+
"learning_rate": 0.0004,
|
30184 |
+
"loss": 8.9245,
|
30185 |
+
"step": 4953
|
30186 |
+
},
|
30187 |
+
{
|
30188 |
+
"epoch": 0.04,
|
30189 |
+
"learning_rate": 0.0004,
|
30190 |
+
"loss": 8.5401,
|
30191 |
+
"step": 4954
|
30192 |
+
},
|
30193 |
+
{
|
30194 |
+
"epoch": 0.04,
|
30195 |
+
"learning_rate": 0.0004,
|
30196 |
+
"loss": 8.2944,
|
30197 |
+
"step": 4955
|
30198 |
+
},
|
30199 |
+
{
|
30200 |
+
"epoch": 0.04,
|
30201 |
+
"learning_rate": 0.0004,
|
30202 |
+
"loss": 6.6151,
|
30203 |
+
"step": 4956
|
30204 |
+
},
|
30205 |
+
{
|
30206 |
+
"epoch": 0.04,
|
30207 |
+
"learning_rate": 0.0004,
|
30208 |
+
"loss": 4.3668,
|
30209 |
+
"step": 4957
|
30210 |
+
},
|
30211 |
+
{
|
30212 |
+
"epoch": 0.04,
|
30213 |
+
"learning_rate": 0.0004,
|
30214 |
+
"loss": 7.4506,
|
30215 |
+
"step": 4958
|
30216 |
+
},
|
30217 |
+
{
|
30218 |
+
"epoch": 0.04,
|
30219 |
+
"learning_rate": 0.0004,
|
30220 |
+
"loss": 7.8919,
|
30221 |
+
"step": 4959
|
30222 |
+
},
|
30223 |
+
{
|
30224 |
+
"epoch": 0.04,
|
30225 |
+
"learning_rate": 0.0004,
|
30226 |
+
"loss": 8.7462,
|
30227 |
+
"step": 4960
|
30228 |
+
},
|
30229 |
+
{
|
30230 |
+
"epoch": 0.04,
|
30231 |
+
"learning_rate": 0.0004,
|
30232 |
+
"loss": 5.8915,
|
30233 |
+
"step": 4961
|
30234 |
+
},
|
30235 |
+
{
|
30236 |
+
"epoch": 0.04,
|
30237 |
+
"learning_rate": 0.0004,
|
30238 |
+
"loss": 7.4696,
|
30239 |
+
"step": 4962
|
30240 |
+
},
|
30241 |
+
{
|
30242 |
+
"epoch": 0.04,
|
30243 |
+
"learning_rate": 0.0004,
|
30244 |
+
"loss": 7.0112,
|
30245 |
+
"step": 4963
|
30246 |
+
},
|
30247 |
+
{
|
30248 |
+
"epoch": 0.04,
|
30249 |
+
"learning_rate": 0.0004,
|
30250 |
+
"loss": 8.1888,
|
30251 |
+
"step": 4964
|
30252 |
+
},
|
30253 |
+
{
|
30254 |
+
"epoch": 0.04,
|
30255 |
+
"learning_rate": 0.0004,
|
30256 |
+
"loss": 7.1465,
|
30257 |
+
"step": 4965
|
30258 |
+
},
|
30259 |
+
{
|
30260 |
+
"epoch": 0.04,
|
30261 |
+
"learning_rate": 0.0004,
|
30262 |
+
"loss": 3.6028,
|
30263 |
+
"step": 4966
|
30264 |
+
},
|
30265 |
+
{
|
30266 |
+
"epoch": 0.04,
|
30267 |
+
"learning_rate": 0.0004,
|
30268 |
+
"loss": 3.279,
|
30269 |
+
"step": 4967
|
30270 |
+
},
|
30271 |
+
{
|
30272 |
+
"epoch": 0.04,
|
30273 |
+
"learning_rate": 0.0004,
|
30274 |
+
"loss": 6.4619,
|
30275 |
+
"step": 4968
|
30276 |
+
},
|
30277 |
+
{
|
30278 |
+
"epoch": 0.04,
|
30279 |
+
"learning_rate": 0.0004,
|
30280 |
+
"loss": 6.7617,
|
30281 |
+
"step": 4969
|
30282 |
+
},
|
30283 |
+
{
|
30284 |
+
"epoch": 0.04,
|
30285 |
+
"learning_rate": 0.0004,
|
30286 |
+
"loss": 8.0521,
|
30287 |
+
"step": 4970
|
30288 |
+
},
|
30289 |
+
{
|
30290 |
+
"epoch": 0.04,
|
30291 |
+
"learning_rate": 0.0004,
|
30292 |
+
"loss": 3.9583,
|
30293 |
+
"step": 4971
|
30294 |
+
},
|
30295 |
+
{
|
30296 |
+
"epoch": 0.04,
|
30297 |
+
"learning_rate": 0.0004,
|
30298 |
+
"loss": 8.5725,
|
30299 |
+
"step": 4972
|
30300 |
+
},
|
30301 |
+
{
|
30302 |
+
"epoch": 0.04,
|
30303 |
+
"learning_rate": 0.0004,
|
30304 |
+
"loss": 6.3248,
|
30305 |
+
"step": 4973
|
30306 |
+
},
|
30307 |
+
{
|
30308 |
+
"epoch": 0.04,
|
30309 |
+
"learning_rate": 0.0004,
|
30310 |
+
"loss": 2.9984,
|
30311 |
+
"step": 4974
|
30312 |
+
},
|
30313 |
+
{
|
30314 |
+
"epoch": 0.04,
|
30315 |
+
"learning_rate": 0.0004,
|
30316 |
+
"loss": 5.7955,
|
30317 |
+
"step": 4975
|
30318 |
+
},
|
30319 |
+
{
|
30320 |
+
"epoch": 0.04,
|
30321 |
+
"learning_rate": 0.0004,
|
30322 |
+
"loss": 5.4351,
|
30323 |
+
"step": 4976
|
30324 |
+
},
|
30325 |
+
{
|
30326 |
+
"epoch": 0.04,
|
30327 |
+
"learning_rate": 0.0004,
|
30328 |
+
"loss": 3.5412,
|
30329 |
+
"step": 4977
|
30330 |
+
},
|
30331 |
+
{
|
30332 |
+
"epoch": 0.04,
|
30333 |
+
"learning_rate": 0.0004,
|
30334 |
+
"loss": 9.4986,
|
30335 |
+
"step": 4978
|
30336 |
+
},
|
30337 |
+
{
|
30338 |
+
"epoch": 0.04,
|
30339 |
+
"learning_rate": 0.0004,
|
30340 |
+
"loss": 3.4686,
|
30341 |
+
"step": 4979
|
30342 |
+
},
|
30343 |
+
{
|
30344 |
+
"epoch": 0.04,
|
30345 |
+
"learning_rate": 0.0004,
|
30346 |
+
"loss": 6.3709,
|
30347 |
+
"step": 4980
|
30348 |
+
},
|
30349 |
+
{
|
30350 |
+
"epoch": 0.04,
|
30351 |
+
"learning_rate": 0.0004,
|
30352 |
+
"loss": 3.4326,
|
30353 |
+
"step": 4981
|
30354 |
+
},
|
30355 |
+
{
|
30356 |
+
"epoch": 0.04,
|
30357 |
+
"learning_rate": 0.0004,
|
30358 |
+
"loss": 5.3118,
|
30359 |
+
"step": 4982
|
30360 |
+
},
|
30361 |
+
{
|
30362 |
+
"epoch": 0.04,
|
30363 |
+
"learning_rate": 0.0004,
|
30364 |
+
"loss": 6.2933,
|
30365 |
+
"step": 4983
|
30366 |
+
},
|
30367 |
+
{
|
30368 |
+
"epoch": 0.04,
|
30369 |
+
"learning_rate": 0.0004,
|
30370 |
+
"loss": 7.2728,
|
30371 |
+
"step": 4984
|
30372 |
+
},
|
30373 |
+
{
|
30374 |
+
"epoch": 0.04,
|
30375 |
+
"learning_rate": 0.0004,
|
30376 |
+
"loss": 5.5518,
|
30377 |
+
"step": 4985
|
30378 |
+
},
|
30379 |
+
{
|
30380 |
+
"epoch": 0.04,
|
30381 |
+
"learning_rate": 0.0004,
|
30382 |
+
"loss": 5.8085,
|
30383 |
+
"step": 4986
|
30384 |
+
},
|
30385 |
+
{
|
30386 |
+
"epoch": 0.04,
|
30387 |
+
"learning_rate": 0.0004,
|
30388 |
+
"loss": 6.024,
|
30389 |
+
"step": 4987
|
30390 |
+
},
|
30391 |
+
{
|
30392 |
+
"epoch": 0.04,
|
30393 |
+
"learning_rate": 0.0004,
|
30394 |
+
"loss": 6.7633,
|
30395 |
+
"step": 4988
|
30396 |
+
},
|
30397 |
+
{
|
30398 |
+
"epoch": 0.04,
|
30399 |
+
"learning_rate": 0.0004,
|
30400 |
+
"loss": 3.9099,
|
30401 |
+
"step": 4989
|
30402 |
+
},
|
30403 |
+
{
|
30404 |
+
"epoch": 0.04,
|
30405 |
+
"learning_rate": 0.0004,
|
30406 |
+
"loss": 3.0304,
|
30407 |
+
"step": 4990
|
30408 |
+
},
|
30409 |
+
{
|
30410 |
+
"epoch": 0.04,
|
30411 |
+
"learning_rate": 0.0004,
|
30412 |
+
"loss": 5.939,
|
30413 |
+
"step": 4991
|
30414 |
+
},
|
30415 |
+
{
|
30416 |
+
"epoch": 0.04,
|
30417 |
+
"learning_rate": 0.0004,
|
30418 |
+
"loss": 3.1024,
|
30419 |
+
"step": 4992
|
30420 |
+
},
|
30421 |
+
{
|
30422 |
+
"epoch": 0.04,
|
30423 |
+
"learning_rate": 0.0004,
|
30424 |
+
"loss": 3.2432,
|
30425 |
+
"step": 4993
|
30426 |
+
},
|
30427 |
+
{
|
30428 |
+
"epoch": 0.04,
|
30429 |
+
"learning_rate": 0.0004,
|
30430 |
+
"loss": 6.9213,
|
30431 |
+
"step": 4994
|
30432 |
+
},
|
30433 |
+
{
|
30434 |
+
"epoch": 0.04,
|
30435 |
+
"learning_rate": 0.0004,
|
30436 |
+
"loss": 4.6644,
|
30437 |
+
"step": 4995
|
30438 |
+
},
|
30439 |
+
{
|
30440 |
+
"epoch": 0.04,
|
30441 |
+
"learning_rate": 0.0004,
|
30442 |
+
"loss": 5.9821,
|
30443 |
+
"step": 4996
|
30444 |
+
},
|
30445 |
+
{
|
30446 |
+
"epoch": 0.04,
|
30447 |
+
"learning_rate": 0.0004,
|
30448 |
+
"loss": 4.9677,
|
30449 |
+
"step": 4997
|
30450 |
+
},
|
30451 |
+
{
|
30452 |
+
"epoch": 0.04,
|
30453 |
+
"learning_rate": 0.0004,
|
30454 |
+
"loss": 6.7992,
|
30455 |
+
"step": 4998
|
30456 |
+
},
|
30457 |
+
{
|
30458 |
+
"epoch": 0.04,
|
30459 |
+
"learning_rate": 0.0004,
|
30460 |
+
"loss": 3.2743,
|
30461 |
+
"step": 4999
|
30462 |
+
},
|
30463 |
+
{
|
30464 |
+
"epoch": 0.04,
|
30465 |
+
"learning_rate": 0.0004,
|
30466 |
+
"loss": 4.5054,
|
30467 |
+
"step": 5000
|
30468 |
+
},
|
30469 |
+
{
|
30470 |
+
"epoch": 0.04,
|
30471 |
+
"eval_loss": 6.463876247406006,
|
30472 |
+
"eval_runtime": 22.4171,
|
30473 |
+
"eval_samples_per_second": 2.23,
|
30474 |
+
"eval_steps_per_second": 1.115,
|
30475 |
+
"step": 5000
|
30476 |
+
},
|
30477 |
+
{
|
30478 |
+
"epoch": 0.04,
|
30479 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
30480 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
30481 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
30482 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
30483 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
30484 |
+
"mmlu_loss": 3.4964506435394287,
|
30485 |
+
"step": 5000
|
30486 |
+
},
|
30487 |
+
{
|
30488 |
+
"epoch": 0.04,
|
30489 |
+
"learning_rate": 0.0004,
|
30490 |
+
"loss": 7.8989,
|
30491 |
+
"step": 5001
|
30492 |
+
},
|
30493 |
+
{
|
30494 |
+
"epoch": 0.04,
|
30495 |
+
"learning_rate": 0.0004,
|
30496 |
+
"loss": 8.045,
|
30497 |
+
"step": 5002
|
30498 |
+
},
|
30499 |
+
{
|
30500 |
+
"epoch": 0.04,
|
30501 |
+
"learning_rate": 0.0004,
|
30502 |
+
"loss": 8.2012,
|
30503 |
+
"step": 5003
|
30504 |
+
},
|
30505 |
+
{
|
30506 |
+
"epoch": 0.04,
|
30507 |
+
"learning_rate": 0.0004,
|
30508 |
+
"loss": 7.5305,
|
30509 |
+
"step": 5004
|
30510 |
+
},
|
30511 |
+
{
|
30512 |
+
"epoch": 0.04,
|
30513 |
+
"learning_rate": 0.0004,
|
30514 |
+
"loss": 7.2522,
|
30515 |
+
"step": 5005
|
30516 |
+
},
|
30517 |
+
{
|
30518 |
+
"epoch": 0.04,
|
30519 |
+
"learning_rate": 0.0004,
|
30520 |
+
"loss": 7.5176,
|
30521 |
+
"step": 5006
|
30522 |
+
},
|
30523 |
+
{
|
30524 |
+
"epoch": 0.04,
|
30525 |
+
"learning_rate": 0.0004,
|
30526 |
+
"loss": 6.9333,
|
30527 |
+
"step": 5007
|
30528 |
+
},
|
30529 |
+
{
|
30530 |
+
"epoch": 0.04,
|
30531 |
+
"learning_rate": 0.0004,
|
30532 |
+
"loss": 5.6115,
|
30533 |
+
"step": 5008
|
30534 |
+
},
|
30535 |
+
{
|
30536 |
+
"epoch": 0.04,
|
30537 |
+
"learning_rate": 0.0004,
|
30538 |
+
"loss": 7.2527,
|
30539 |
+
"step": 5009
|
30540 |
+
},
|
30541 |
+
{
|
30542 |
+
"epoch": 0.04,
|
30543 |
+
"learning_rate": 0.0004,
|
30544 |
+
"loss": 7.3046,
|
30545 |
+
"step": 5010
|
30546 |
+
},
|
30547 |
+
{
|
30548 |
+
"epoch": 0.04,
|
30549 |
+
"learning_rate": 0.0004,
|
30550 |
+
"loss": 6.2746,
|
30551 |
+
"step": 5011
|
30552 |
+
},
|
30553 |
+
{
|
30554 |
+
"epoch": 0.04,
|
30555 |
+
"learning_rate": 0.0004,
|
30556 |
+
"loss": 5.8078,
|
30557 |
+
"step": 5012
|
30558 |
+
},
|
30559 |
+
{
|
30560 |
+
"epoch": 0.04,
|
30561 |
+
"learning_rate": 0.0004,
|
30562 |
+
"loss": 5.0053,
|
30563 |
+
"step": 5013
|
30564 |
+
},
|
30565 |
+
{
|
30566 |
+
"epoch": 0.04,
|
30567 |
+
"learning_rate": 0.0004,
|
30568 |
+
"loss": 8.3415,
|
30569 |
+
"step": 5014
|
30570 |
+
},
|
30571 |
+
{
|
30572 |
+
"epoch": 0.04,
|
30573 |
+
"learning_rate": 0.0004,
|
30574 |
+
"loss": 8.1199,
|
30575 |
+
"step": 5015
|
30576 |
+
},
|
30577 |
+
{
|
30578 |
+
"epoch": 0.04,
|
30579 |
+
"learning_rate": 0.0004,
|
30580 |
+
"loss": 5.6292,
|
30581 |
+
"step": 5016
|
30582 |
+
},
|
30583 |
+
{
|
30584 |
+
"epoch": 0.04,
|
30585 |
+
"learning_rate": 0.0004,
|
30586 |
+
"loss": 3.5195,
|
30587 |
+
"step": 5017
|
30588 |
+
},
|
30589 |
+
{
|
30590 |
+
"epoch": 0.04,
|
30591 |
+
"learning_rate": 0.0004,
|
30592 |
+
"loss": 7.2367,
|
30593 |
+
"step": 5018
|
30594 |
+
},
|
30595 |
+
{
|
30596 |
+
"epoch": 0.04,
|
30597 |
+
"learning_rate": 0.0004,
|
30598 |
+
"loss": 4.219,
|
30599 |
+
"step": 5019
|
30600 |
+
},
|
30601 |
+
{
|
30602 |
+
"epoch": 0.04,
|
30603 |
+
"learning_rate": 0.0004,
|
30604 |
+
"loss": 3.4376,
|
30605 |
+
"step": 5020
|
30606 |
+
},
|
30607 |
+
{
|
30608 |
+
"epoch": 0.04,
|
30609 |
+
"learning_rate": 0.0004,
|
30610 |
+
"loss": 4.1413,
|
30611 |
+
"step": 5021
|
30612 |
+
},
|
30613 |
+
{
|
30614 |
+
"epoch": 0.04,
|
30615 |
+
"learning_rate": 0.0004,
|
30616 |
+
"loss": 5.7102,
|
30617 |
+
"step": 5022
|
30618 |
+
},
|
30619 |
+
{
|
30620 |
+
"epoch": 0.04,
|
30621 |
+
"learning_rate": 0.0004,
|
30622 |
+
"loss": 3.3297,
|
30623 |
+
"step": 5023
|
30624 |
+
},
|
30625 |
+
{
|
30626 |
+
"epoch": 0.04,
|
30627 |
+
"learning_rate": 0.0004,
|
30628 |
+
"loss": 8.9923,
|
30629 |
+
"step": 5024
|
30630 |
+
},
|
30631 |
+
{
|
30632 |
+
"epoch": 0.04,
|
30633 |
+
"learning_rate": 0.0004,
|
30634 |
+
"loss": 3.0071,
|
30635 |
+
"step": 5025
|
30636 |
+
},
|
30637 |
+
{
|
30638 |
+
"epoch": 0.04,
|
30639 |
+
"learning_rate": 0.0004,
|
30640 |
+
"loss": 7.351,
|
30641 |
+
"step": 5026
|
30642 |
+
},
|
30643 |
+
{
|
30644 |
+
"epoch": 0.04,
|
30645 |
+
"learning_rate": 0.0004,
|
30646 |
+
"loss": 2.824,
|
30647 |
+
"step": 5027
|
30648 |
+
},
|
30649 |
+
{
|
30650 |
+
"epoch": 0.04,
|
30651 |
+
"learning_rate": 0.0004,
|
30652 |
+
"loss": 7.5031,
|
30653 |
+
"step": 5028
|
30654 |
+
},
|
30655 |
+
{
|
30656 |
+
"epoch": 0.04,
|
30657 |
+
"learning_rate": 0.0004,
|
30658 |
+
"loss": 7.7362,
|
30659 |
+
"step": 5029
|
30660 |
+
},
|
30661 |
+
{
|
30662 |
+
"epoch": 0.04,
|
30663 |
+
"learning_rate": 0.0004,
|
30664 |
+
"loss": 5.5291,
|
30665 |
+
"step": 5030
|
30666 |
+
},
|
30667 |
+
{
|
30668 |
+
"epoch": 0.04,
|
30669 |
+
"learning_rate": 0.0004,
|
30670 |
+
"loss": 6.2754,
|
30671 |
+
"step": 5031
|
30672 |
+
},
|
30673 |
+
{
|
30674 |
+
"epoch": 0.04,
|
30675 |
+
"learning_rate": 0.0004,
|
30676 |
+
"loss": 3.1831,
|
30677 |
+
"step": 5032
|
30678 |
+
},
|
30679 |
+
{
|
30680 |
+
"epoch": 0.04,
|
30681 |
+
"learning_rate": 0.0004,
|
30682 |
+
"loss": 7.1838,
|
30683 |
+
"step": 5033
|
30684 |
+
},
|
30685 |
+
{
|
30686 |
+
"epoch": 0.04,
|
30687 |
+
"learning_rate": 0.0004,
|
30688 |
+
"loss": 5.2428,
|
30689 |
+
"step": 5034
|
30690 |
+
},
|
30691 |
+
{
|
30692 |
+
"epoch": 0.04,
|
30693 |
+
"learning_rate": 0.0004,
|
30694 |
+
"loss": 3.0482,
|
30695 |
+
"step": 5035
|
30696 |
+
},
|
30697 |
+
{
|
30698 |
+
"epoch": 0.04,
|
30699 |
+
"learning_rate": 0.0004,
|
30700 |
+
"loss": 5.2135,
|
30701 |
+
"step": 5036
|
30702 |
+
},
|
30703 |
+
{
|
30704 |
+
"epoch": 0.04,
|
30705 |
+
"learning_rate": 0.0004,
|
30706 |
+
"loss": 6.2257,
|
30707 |
+
"step": 5037
|
30708 |
+
},
|
30709 |
+
{
|
30710 |
+
"epoch": 0.04,
|
30711 |
+
"learning_rate": 0.0004,
|
30712 |
+
"loss": 6.7514,
|
30713 |
+
"step": 5038
|
30714 |
+
},
|
30715 |
+
{
|
30716 |
+
"epoch": 0.04,
|
30717 |
+
"learning_rate": 0.0004,
|
30718 |
+
"loss": 5.9855,
|
30719 |
+
"step": 5039
|
30720 |
+
},
|
30721 |
+
{
|
30722 |
+
"epoch": 0.04,
|
30723 |
+
"learning_rate": 0.0004,
|
30724 |
+
"loss": 2.9738,
|
30725 |
+
"step": 5040
|
30726 |
+
},
|
30727 |
+
{
|
30728 |
+
"epoch": 0.04,
|
30729 |
+
"learning_rate": 0.0004,
|
30730 |
+
"loss": 3.3993,
|
30731 |
+
"step": 5041
|
30732 |
+
},
|
30733 |
+
{
|
30734 |
+
"epoch": 0.04,
|
30735 |
+
"learning_rate": 0.0004,
|
30736 |
+
"loss": 8.35,
|
30737 |
+
"step": 5042
|
30738 |
+
},
|
30739 |
+
{
|
30740 |
+
"epoch": 0.04,
|
30741 |
+
"learning_rate": 0.0004,
|
30742 |
+
"loss": 7.1362,
|
30743 |
+
"step": 5043
|
30744 |
+
},
|
30745 |
+
{
|
30746 |
+
"epoch": 0.04,
|
30747 |
+
"learning_rate": 0.0004,
|
30748 |
+
"loss": 3.2576,
|
30749 |
+
"step": 5044
|
30750 |
+
},
|
30751 |
+
{
|
30752 |
+
"epoch": 0.04,
|
30753 |
+
"learning_rate": 0.0004,
|
30754 |
+
"loss": 4.0811,
|
30755 |
+
"step": 5045
|
30756 |
+
},
|
30757 |
+
{
|
30758 |
+
"epoch": 0.04,
|
30759 |
+
"learning_rate": 0.0004,
|
30760 |
+
"loss": 5.1524,
|
30761 |
+
"step": 5046
|
30762 |
+
},
|
30763 |
+
{
|
30764 |
+
"epoch": 0.04,
|
30765 |
+
"learning_rate": 0.0004,
|
30766 |
+
"loss": 3.3487,
|
30767 |
+
"step": 5047
|
30768 |
+
},
|
30769 |
+
{
|
30770 |
+
"epoch": 0.04,
|
30771 |
+
"learning_rate": 0.0004,
|
30772 |
+
"loss": 3.6397,
|
30773 |
+
"step": 5048
|
30774 |
+
},
|
30775 |
+
{
|
30776 |
+
"epoch": 0.04,
|
30777 |
+
"learning_rate": 0.0004,
|
30778 |
+
"loss": 3.5974,
|
30779 |
+
"step": 5049
|
30780 |
+
},
|
30781 |
+
{
|
30782 |
+
"epoch": 0.04,
|
30783 |
+
"learning_rate": 0.0004,
|
30784 |
+
"loss": 3.3061,
|
30785 |
+
"step": 5050
|
30786 |
+
},
|
30787 |
+
{
|
30788 |
+
"epoch": 0.04,
|
30789 |
+
"learning_rate": 0.0004,
|
30790 |
+
"loss": 8.1425,
|
30791 |
+
"step": 5051
|
30792 |
+
},
|
30793 |
+
{
|
30794 |
+
"epoch": 0.04,
|
30795 |
+
"learning_rate": 0.0004,
|
30796 |
+
"loss": 7.7089,
|
30797 |
+
"step": 5052
|
30798 |
+
},
|
30799 |
+
{
|
30800 |
+
"epoch": 0.04,
|
30801 |
+
"learning_rate": 0.0004,
|
30802 |
+
"loss": 7.2659,
|
30803 |
+
"step": 5053
|
30804 |
+
},
|
30805 |
+
{
|
30806 |
+
"epoch": 0.04,
|
30807 |
+
"learning_rate": 0.0004,
|
30808 |
+
"loss": 8.8699,
|
30809 |
+
"step": 5054
|
30810 |
+
},
|
30811 |
+
{
|
30812 |
+
"epoch": 0.04,
|
30813 |
+
"learning_rate": 0.0004,
|
30814 |
+
"loss": 6.561,
|
30815 |
+
"step": 5055
|
30816 |
+
},
|
30817 |
+
{
|
30818 |
+
"epoch": 0.04,
|
30819 |
+
"learning_rate": 0.0004,
|
30820 |
+
"loss": 8.8095,
|
30821 |
+
"step": 5056
|
30822 |
+
},
|
30823 |
+
{
|
30824 |
+
"epoch": 0.04,
|
30825 |
+
"learning_rate": 0.0004,
|
30826 |
+
"loss": 8.5513,
|
30827 |
+
"step": 5057
|
30828 |
+
},
|
30829 |
+
{
|
30830 |
+
"epoch": 0.04,
|
30831 |
+
"learning_rate": 0.0004,
|
30832 |
+
"loss": 5.8696,
|
30833 |
+
"step": 5058
|
30834 |
+
},
|
30835 |
+
{
|
30836 |
+
"epoch": 0.04,
|
30837 |
+
"learning_rate": 0.0004,
|
30838 |
+
"loss": 7.397,
|
30839 |
+
"step": 5059
|
30840 |
+
},
|
30841 |
+
{
|
30842 |
+
"epoch": 0.04,
|
30843 |
+
"learning_rate": 0.0004,
|
30844 |
+
"loss": 7.3762,
|
30845 |
+
"step": 5060
|
30846 |
+
},
|
30847 |
+
{
|
30848 |
+
"epoch": 0.04,
|
30849 |
+
"learning_rate": 0.0004,
|
30850 |
+
"loss": 7.1008,
|
30851 |
+
"step": 5061
|
30852 |
+
},
|
30853 |
+
{
|
30854 |
+
"epoch": 0.04,
|
30855 |
+
"learning_rate": 0.0004,
|
30856 |
+
"loss": 6.4717,
|
30857 |
+
"step": 5062
|
30858 |
+
},
|
30859 |
+
{
|
30860 |
+
"epoch": 0.04,
|
30861 |
+
"learning_rate": 0.0004,
|
30862 |
+
"loss": 6.8093,
|
30863 |
+
"step": 5063
|
30864 |
+
},
|
30865 |
+
{
|
30866 |
+
"epoch": 0.04,
|
30867 |
+
"learning_rate": 0.0004,
|
30868 |
+
"loss": 3.9448,
|
30869 |
+
"step": 5064
|
30870 |
+
},
|
30871 |
+
{
|
30872 |
+
"epoch": 0.04,
|
30873 |
+
"learning_rate": 0.0004,
|
30874 |
+
"loss": 3.4344,
|
30875 |
+
"step": 5065
|
30876 |
+
},
|
30877 |
+
{
|
30878 |
+
"epoch": 0.04,
|
30879 |
+
"learning_rate": 0.0004,
|
30880 |
+
"loss": 7.6244,
|
30881 |
+
"step": 5066
|
30882 |
+
},
|
30883 |
+
{
|
30884 |
+
"epoch": 0.04,
|
30885 |
+
"learning_rate": 0.0004,
|
30886 |
+
"loss": 3.1716,
|
30887 |
+
"step": 5067
|
30888 |
+
},
|
30889 |
+
{
|
30890 |
+
"epoch": 0.04,
|
30891 |
+
"learning_rate": 0.0004,
|
30892 |
+
"loss": 3.664,
|
30893 |
+
"step": 5068
|
30894 |
+
},
|
30895 |
+
{
|
30896 |
+
"epoch": 0.04,
|
30897 |
+
"learning_rate": 0.0004,
|
30898 |
+
"loss": 6.798,
|
30899 |
+
"step": 5069
|
30900 |
+
},
|
30901 |
+
{
|
30902 |
+
"epoch": 0.04,
|
30903 |
+
"learning_rate": 0.0004,
|
30904 |
+
"loss": 7.5492,
|
30905 |
+
"step": 5070
|
30906 |
+
},
|
30907 |
+
{
|
30908 |
+
"epoch": 0.04,
|
30909 |
+
"learning_rate": 0.0004,
|
30910 |
+
"loss": 6.4213,
|
30911 |
+
"step": 5071
|
30912 |
+
},
|
30913 |
+
{
|
30914 |
+
"epoch": 0.04,
|
30915 |
+
"learning_rate": 0.0004,
|
30916 |
+
"loss": 7.4969,
|
30917 |
+
"step": 5072
|
30918 |
+
},
|
30919 |
+
{
|
30920 |
+
"epoch": 0.04,
|
30921 |
+
"learning_rate": 0.0004,
|
30922 |
+
"loss": 4.6795,
|
30923 |
+
"step": 5073
|
30924 |
+
},
|
30925 |
+
{
|
30926 |
+
"epoch": 0.04,
|
30927 |
+
"learning_rate": 0.0004,
|
30928 |
+
"loss": 3.4509,
|
30929 |
+
"step": 5074
|
30930 |
+
},
|
30931 |
+
{
|
30932 |
+
"epoch": 0.04,
|
30933 |
+
"learning_rate": 0.0004,
|
30934 |
+
"loss": 3.7005,
|
30935 |
+
"step": 5075
|
30936 |
+
},
|
30937 |
+
{
|
30938 |
+
"epoch": 0.04,
|
30939 |
+
"learning_rate": 0.0004,
|
30940 |
+
"loss": 3.0754,
|
30941 |
+
"step": 5076
|
30942 |
+
},
|
30943 |
+
{
|
30944 |
+
"epoch": 0.04,
|
30945 |
+
"learning_rate": 0.0004,
|
30946 |
+
"loss": 3.0044,
|
30947 |
+
"step": 5077
|
30948 |
+
},
|
30949 |
+
{
|
30950 |
+
"epoch": 0.04,
|
30951 |
+
"learning_rate": 0.0004,
|
30952 |
+
"loss": 5.8357,
|
30953 |
+
"step": 5078
|
30954 |
+
},
|
30955 |
+
{
|
30956 |
+
"epoch": 0.04,
|
30957 |
+
"learning_rate": 0.0004,
|
30958 |
+
"loss": 6.0447,
|
30959 |
+
"step": 5079
|
30960 |
+
},
|
30961 |
+
{
|
30962 |
+
"epoch": 0.04,
|
30963 |
+
"learning_rate": 0.0004,
|
30964 |
+
"loss": 3.0046,
|
30965 |
+
"step": 5080
|
30966 |
+
},
|
30967 |
+
{
|
30968 |
+
"epoch": 0.04,
|
30969 |
+
"learning_rate": 0.0004,
|
30970 |
+
"loss": 7.3184,
|
30971 |
+
"step": 5081
|
30972 |
+
},
|
30973 |
+
{
|
30974 |
+
"epoch": 0.04,
|
30975 |
+
"learning_rate": 0.0004,
|
30976 |
+
"loss": 9.1669,
|
30977 |
+
"step": 5082
|
30978 |
+
},
|
30979 |
+
{
|
30980 |
+
"epoch": 0.04,
|
30981 |
+
"learning_rate": 0.0004,
|
30982 |
+
"loss": 2.7666,
|
30983 |
+
"step": 5083
|
30984 |
+
},
|
30985 |
+
{
|
30986 |
+
"epoch": 0.04,
|
30987 |
+
"learning_rate": 0.0004,
|
30988 |
+
"loss": 3.6633,
|
30989 |
+
"step": 5084
|
30990 |
+
},
|
30991 |
+
{
|
30992 |
+
"epoch": 0.04,
|
30993 |
+
"learning_rate": 0.0004,
|
30994 |
+
"loss": 2.5884,
|
30995 |
+
"step": 5085
|
30996 |
+
},
|
30997 |
+
{
|
30998 |
+
"epoch": 0.04,
|
30999 |
+
"learning_rate": 0.0004,
|
31000 |
+
"loss": 7.572,
|
31001 |
+
"step": 5086
|
31002 |
+
},
|
31003 |
+
{
|
31004 |
+
"epoch": 0.04,
|
31005 |
+
"learning_rate": 0.0004,
|
31006 |
+
"loss": 3.7835,
|
31007 |
+
"step": 5087
|
31008 |
+
},
|
31009 |
+
{
|
31010 |
+
"epoch": 0.04,
|
31011 |
+
"learning_rate": 0.0004,
|
31012 |
+
"loss": 9.58,
|
31013 |
+
"step": 5088
|
31014 |
+
},
|
31015 |
+
{
|
31016 |
+
"epoch": 0.04,
|
31017 |
+
"learning_rate": 0.0004,
|
31018 |
+
"loss": 1.8841,
|
31019 |
+
"step": 5089
|
31020 |
+
},
|
31021 |
+
{
|
31022 |
+
"epoch": 0.04,
|
31023 |
+
"learning_rate": 0.0004,
|
31024 |
+
"loss": 1.8848,
|
31025 |
+
"step": 5090
|
31026 |
+
},
|
31027 |
+
{
|
31028 |
+
"epoch": 0.04,
|
31029 |
+
"learning_rate": 0.0004,
|
31030 |
+
"loss": 2.2856,
|
31031 |
+
"step": 5091
|
31032 |
+
},
|
31033 |
+
{
|
31034 |
+
"epoch": 0.04,
|
31035 |
+
"learning_rate": 0.0004,
|
31036 |
+
"loss": 5.4633,
|
31037 |
+
"step": 5092
|
31038 |
+
},
|
31039 |
+
{
|
31040 |
+
"epoch": 0.04,
|
31041 |
+
"learning_rate": 0.0004,
|
31042 |
+
"loss": 6.082,
|
31043 |
+
"step": 5093
|
31044 |
+
},
|
31045 |
+
{
|
31046 |
+
"epoch": 0.04,
|
31047 |
+
"learning_rate": 0.0004,
|
31048 |
+
"loss": 6.8496,
|
31049 |
+
"step": 5094
|
31050 |
+
},
|
31051 |
+
{
|
31052 |
+
"epoch": 0.04,
|
31053 |
+
"learning_rate": 0.0004,
|
31054 |
+
"loss": 6.2212,
|
31055 |
+
"step": 5095
|
31056 |
+
},
|
31057 |
+
{
|
31058 |
+
"epoch": 0.04,
|
31059 |
+
"learning_rate": 0.0004,
|
31060 |
+
"loss": 6.5972,
|
31061 |
+
"step": 5096
|
31062 |
+
},
|
31063 |
+
{
|
31064 |
+
"epoch": 0.04,
|
31065 |
+
"learning_rate": 0.0004,
|
31066 |
+
"loss": 6.9355,
|
31067 |
+
"step": 5097
|
31068 |
+
},
|
31069 |
+
{
|
31070 |
+
"epoch": 0.04,
|
31071 |
+
"learning_rate": 0.0004,
|
31072 |
+
"loss": 7.5645,
|
31073 |
+
"step": 5098
|
31074 |
+
},
|
31075 |
+
{
|
31076 |
+
"epoch": 0.04,
|
31077 |
+
"learning_rate": 0.0004,
|
31078 |
+
"loss": 5.944,
|
31079 |
+
"step": 5099
|
31080 |
+
},
|
31081 |
+
{
|
31082 |
+
"epoch": 0.04,
|
31083 |
+
"learning_rate": 0.0004,
|
31084 |
+
"loss": 1.7678,
|
31085 |
+
"step": 5100
|
31086 |
+
},
|
31087 |
+
{
|
31088 |
+
"epoch": 0.04,
|
31089 |
+
"learning_rate": 0.0004,
|
31090 |
+
"loss": 2.294,
|
31091 |
+
"step": 5101
|
31092 |
+
},
|
31093 |
+
{
|
31094 |
+
"epoch": 0.04,
|
31095 |
+
"learning_rate": 0.0004,
|
31096 |
+
"loss": 8.9452,
|
31097 |
+
"step": 5102
|
31098 |
+
},
|
31099 |
+
{
|
31100 |
+
"epoch": 0.04,
|
31101 |
+
"learning_rate": 0.0004,
|
31102 |
+
"loss": 7.2751,
|
31103 |
+
"step": 5103
|
31104 |
+
},
|
31105 |
+
{
|
31106 |
+
"epoch": 0.04,
|
31107 |
+
"learning_rate": 0.0004,
|
31108 |
+
"loss": 7.2882,
|
31109 |
+
"step": 5104
|
31110 |
+
},
|
31111 |
+
{
|
31112 |
+
"epoch": 0.04,
|
31113 |
+
"learning_rate": 0.0004,
|
31114 |
+
"loss": 8.3101,
|
31115 |
+
"step": 5105
|
31116 |
+
},
|
31117 |
+
{
|
31118 |
+
"epoch": 0.04,
|
31119 |
+
"learning_rate": 0.0004,
|
31120 |
+
"loss": 8.1729,
|
31121 |
+
"step": 5106
|
31122 |
+
},
|
31123 |
+
{
|
31124 |
+
"epoch": 0.04,
|
31125 |
+
"learning_rate": 0.0004,
|
31126 |
+
"loss": 4.7164,
|
31127 |
+
"step": 5107
|
31128 |
+
},
|
31129 |
+
{
|
31130 |
+
"epoch": 0.04,
|
31131 |
+
"learning_rate": 0.0004,
|
31132 |
+
"loss": 6.9774,
|
31133 |
+
"step": 5108
|
31134 |
+
},
|
31135 |
+
{
|
31136 |
+
"epoch": 0.04,
|
31137 |
+
"learning_rate": 0.0004,
|
31138 |
+
"loss": 8.5206,
|
31139 |
+
"step": 5109
|
31140 |
+
},
|
31141 |
+
{
|
31142 |
+
"epoch": 0.04,
|
31143 |
+
"learning_rate": 0.0004,
|
31144 |
+
"loss": 7.961,
|
31145 |
+
"step": 5110
|
31146 |
+
},
|
31147 |
+
{
|
31148 |
+
"epoch": 0.04,
|
31149 |
+
"learning_rate": 0.0004,
|
31150 |
+
"loss": 2.5247,
|
31151 |
+
"step": 5111
|
31152 |
+
},
|
31153 |
+
{
|
31154 |
+
"epoch": 0.04,
|
31155 |
+
"learning_rate": 0.0004,
|
31156 |
+
"loss": 6.9292,
|
31157 |
+
"step": 5112
|
31158 |
+
},
|
31159 |
+
{
|
31160 |
+
"epoch": 0.04,
|
31161 |
+
"learning_rate": 0.0004,
|
31162 |
+
"loss": 7.3455,
|
31163 |
+
"step": 5113
|
31164 |
+
},
|
31165 |
+
{
|
31166 |
+
"epoch": 0.04,
|
31167 |
+
"learning_rate": 0.0004,
|
31168 |
+
"loss": 8.483,
|
31169 |
+
"step": 5114
|
31170 |
+
},
|
31171 |
+
{
|
31172 |
+
"epoch": 0.04,
|
31173 |
+
"learning_rate": 0.0004,
|
31174 |
+
"loss": 6.5356,
|
31175 |
+
"step": 5115
|
31176 |
+
},
|
31177 |
+
{
|
31178 |
+
"epoch": 0.04,
|
31179 |
+
"learning_rate": 0.0004,
|
31180 |
+
"loss": 8.7652,
|
31181 |
+
"step": 5116
|
31182 |
+
},
|
31183 |
+
{
|
31184 |
+
"epoch": 0.04,
|
31185 |
+
"learning_rate": 0.0004,
|
31186 |
+
"loss": 7.6761,
|
31187 |
+
"step": 5117
|
31188 |
+
},
|
31189 |
+
{
|
31190 |
+
"epoch": 0.04,
|
31191 |
+
"learning_rate": 0.0004,
|
31192 |
+
"loss": 3.3517,
|
31193 |
+
"step": 5118
|
31194 |
+
},
|
31195 |
+
{
|
31196 |
+
"epoch": 0.04,
|
31197 |
+
"learning_rate": 0.0004,
|
31198 |
+
"loss": 3.6645,
|
31199 |
+
"step": 5119
|
31200 |
+
},
|
31201 |
+
{
|
31202 |
+
"epoch": 0.04,
|
31203 |
+
"learning_rate": 0.0004,
|
31204 |
+
"loss": 8.1988,
|
31205 |
+
"step": 5120
|
31206 |
+
},
|
31207 |
+
{
|
31208 |
+
"epoch": 0.04,
|
31209 |
+
"learning_rate": 0.0004,
|
31210 |
+
"loss": 7.292,
|
31211 |
+
"step": 5121
|
31212 |
+
},
|
31213 |
+
{
|
31214 |
+
"epoch": 0.04,
|
31215 |
+
"learning_rate": 0.0004,
|
31216 |
+
"loss": 7.7002,
|
31217 |
+
"step": 5122
|
31218 |
+
},
|
31219 |
+
{
|
31220 |
+
"epoch": 0.04,
|
31221 |
+
"learning_rate": 0.0004,
|
31222 |
+
"loss": 5.1236,
|
31223 |
+
"step": 5123
|
31224 |
+
},
|
31225 |
+
{
|
31226 |
+
"epoch": 0.04,
|
31227 |
+
"learning_rate": 0.0004,
|
31228 |
+
"loss": 4.2389,
|
31229 |
+
"step": 5124
|
31230 |
+
},
|
31231 |
+
{
|
31232 |
+
"epoch": 0.04,
|
31233 |
+
"learning_rate": 0.0004,
|
31234 |
+
"loss": 5.5483,
|
31235 |
+
"step": 5125
|
31236 |
+
},
|
31237 |
+
{
|
31238 |
+
"epoch": 0.04,
|
31239 |
+
"learning_rate": 0.0004,
|
31240 |
+
"loss": 6.6833,
|
31241 |
+
"step": 5126
|
31242 |
+
},
|
31243 |
+
{
|
31244 |
+
"epoch": 0.04,
|
31245 |
+
"learning_rate": 0.0004,
|
31246 |
+
"loss": 7.1315,
|
31247 |
+
"step": 5127
|
31248 |
+
},
|
31249 |
+
{
|
31250 |
+
"epoch": 0.04,
|
31251 |
+
"learning_rate": 0.0004,
|
31252 |
+
"loss": 3.8323,
|
31253 |
+
"step": 5128
|
31254 |
+
},
|
31255 |
+
{
|
31256 |
+
"epoch": 0.04,
|
31257 |
+
"learning_rate": 0.0004,
|
31258 |
+
"loss": 2.7692,
|
31259 |
+
"step": 5129
|
31260 |
+
},
|
31261 |
+
{
|
31262 |
+
"epoch": 0.04,
|
31263 |
+
"learning_rate": 0.0004,
|
31264 |
+
"loss": 6.637,
|
31265 |
+
"step": 5130
|
31266 |
+
},
|
31267 |
+
{
|
31268 |
+
"epoch": 0.04,
|
31269 |
+
"learning_rate": 0.0004,
|
31270 |
+
"loss": 4.0574,
|
31271 |
+
"step": 5131
|
31272 |
+
},
|
31273 |
+
{
|
31274 |
+
"epoch": 0.04,
|
31275 |
+
"learning_rate": 0.0004,
|
31276 |
+
"loss": 3.1885,
|
31277 |
+
"step": 5132
|
31278 |
+
},
|
31279 |
+
{
|
31280 |
+
"epoch": 0.04,
|
31281 |
+
"learning_rate": 0.0004,
|
31282 |
+
"loss": 3.2203,
|
31283 |
+
"step": 5133
|
31284 |
+
},
|
31285 |
+
{
|
31286 |
+
"epoch": 0.04,
|
31287 |
+
"learning_rate": 0.0004,
|
31288 |
+
"loss": 3.0885,
|
31289 |
+
"step": 5134
|
31290 |
+
},
|
31291 |
+
{
|
31292 |
+
"epoch": 0.04,
|
31293 |
+
"learning_rate": 0.0004,
|
31294 |
+
"loss": 8.2705,
|
31295 |
+
"step": 5135
|
31296 |
+
},
|
31297 |
+
{
|
31298 |
+
"epoch": 0.04,
|
31299 |
+
"learning_rate": 0.0004,
|
31300 |
+
"loss": 7.3963,
|
31301 |
+
"step": 5136
|
31302 |
+
},
|
31303 |
+
{
|
31304 |
+
"epoch": 0.04,
|
31305 |
+
"learning_rate": 0.0004,
|
31306 |
+
"loss": 6.9949,
|
31307 |
+
"step": 5137
|
31308 |
+
},
|
31309 |
+
{
|
31310 |
+
"epoch": 0.04,
|
31311 |
+
"learning_rate": 0.0004,
|
31312 |
+
"loss": 5.7339,
|
31313 |
+
"step": 5138
|
31314 |
+
},
|
31315 |
+
{
|
31316 |
+
"epoch": 0.04,
|
31317 |
+
"learning_rate": 0.0004,
|
31318 |
+
"loss": 5.041,
|
31319 |
+
"step": 5139
|
31320 |
+
},
|
31321 |
+
{
|
31322 |
+
"epoch": 0.04,
|
31323 |
+
"learning_rate": 0.0004,
|
31324 |
+
"loss": 7.3292,
|
31325 |
+
"step": 5140
|
31326 |
+
},
|
31327 |
+
{
|
31328 |
+
"epoch": 0.04,
|
31329 |
+
"learning_rate": 0.0004,
|
31330 |
+
"loss": 2.7841,
|
31331 |
+
"step": 5141
|
31332 |
+
},
|
31333 |
+
{
|
31334 |
+
"epoch": 0.04,
|
31335 |
+
"learning_rate": 0.0004,
|
31336 |
+
"loss": 2.5847,
|
31337 |
+
"step": 5142
|
31338 |
+
},
|
31339 |
+
{
|
31340 |
+
"epoch": 0.04,
|
31341 |
+
"learning_rate": 0.0004,
|
31342 |
+
"loss": 3.3698,
|
31343 |
+
"step": 5143
|
31344 |
+
},
|
31345 |
+
{
|
31346 |
+
"epoch": 0.04,
|
31347 |
+
"learning_rate": 0.0004,
|
31348 |
+
"loss": 6.2052,
|
31349 |
+
"step": 5144
|
31350 |
+
},
|
31351 |
+
{
|
31352 |
+
"epoch": 0.04,
|
31353 |
+
"learning_rate": 0.0004,
|
31354 |
+
"loss": 4.8951,
|
31355 |
+
"step": 5145
|
31356 |
+
},
|
31357 |
+
{
|
31358 |
+
"epoch": 0.04,
|
31359 |
+
"learning_rate": 0.0004,
|
31360 |
+
"loss": 5.9602,
|
31361 |
+
"step": 5146
|
31362 |
+
},
|
31363 |
+
{
|
31364 |
+
"epoch": 0.04,
|
31365 |
+
"learning_rate": 0.0004,
|
31366 |
+
"loss": 2.2329,
|
31367 |
+
"step": 5147
|
31368 |
+
},
|
31369 |
+
{
|
31370 |
+
"epoch": 0.04,
|
31371 |
+
"learning_rate": 0.0004,
|
31372 |
+
"loss": 8.0463,
|
31373 |
+
"step": 5148
|
31374 |
+
},
|
31375 |
+
{
|
31376 |
+
"epoch": 0.04,
|
31377 |
+
"learning_rate": 0.0004,
|
31378 |
+
"loss": 4.4032,
|
31379 |
+
"step": 5149
|
31380 |
+
},
|
31381 |
+
{
|
31382 |
+
"epoch": 0.04,
|
31383 |
+
"learning_rate": 0.0004,
|
31384 |
+
"loss": 3.3965,
|
31385 |
+
"step": 5150
|
31386 |
+
},
|
31387 |
+
{
|
31388 |
+
"epoch": 0.04,
|
31389 |
+
"learning_rate": 0.0004,
|
31390 |
+
"loss": 8.4988,
|
31391 |
+
"step": 5151
|
31392 |
+
},
|
31393 |
+
{
|
31394 |
+
"epoch": 0.04,
|
31395 |
+
"learning_rate": 0.0004,
|
31396 |
+
"loss": 8.2164,
|
31397 |
+
"step": 5152
|
31398 |
+
},
|
31399 |
+
{
|
31400 |
+
"epoch": 0.04,
|
31401 |
+
"learning_rate": 0.0004,
|
31402 |
+
"loss": 7.1181,
|
31403 |
+
"step": 5153
|
31404 |
+
},
|
31405 |
+
{
|
31406 |
+
"epoch": 0.04,
|
31407 |
+
"learning_rate": 0.0004,
|
31408 |
+
"loss": 7.6035,
|
31409 |
+
"step": 5154
|
31410 |
+
},
|
31411 |
+
{
|
31412 |
+
"epoch": 0.04,
|
31413 |
+
"learning_rate": 0.0004,
|
31414 |
+
"loss": 4.8853,
|
31415 |
+
"step": 5155
|
31416 |
+
},
|
31417 |
+
{
|
31418 |
+
"epoch": 0.04,
|
31419 |
+
"learning_rate": 0.0004,
|
31420 |
+
"loss": 8.9761,
|
31421 |
+
"step": 5156
|
31422 |
+
},
|
31423 |
+
{
|
31424 |
+
"epoch": 0.04,
|
31425 |
+
"learning_rate": 0.0004,
|
31426 |
+
"loss": 7.6907,
|
31427 |
+
"step": 5157
|
31428 |
+
},
|
31429 |
+
{
|
31430 |
+
"epoch": 0.04,
|
31431 |
+
"learning_rate": 0.0004,
|
31432 |
+
"loss": 4.5017,
|
31433 |
+
"step": 5158
|
31434 |
+
},
|
31435 |
+
{
|
31436 |
+
"epoch": 0.04,
|
31437 |
+
"learning_rate": 0.0004,
|
31438 |
+
"loss": 4.3164,
|
31439 |
+
"step": 5159
|
31440 |
+
},
|
31441 |
+
{
|
31442 |
+
"epoch": 0.04,
|
31443 |
+
"learning_rate": 0.0004,
|
31444 |
+
"loss": 6.7913,
|
31445 |
+
"step": 5160
|
31446 |
+
},
|
31447 |
+
{
|
31448 |
+
"epoch": 0.04,
|
31449 |
+
"learning_rate": 0.0004,
|
31450 |
+
"loss": 8.4106,
|
31451 |
+
"step": 5161
|
31452 |
+
},
|
31453 |
+
{
|
31454 |
+
"epoch": 0.04,
|
31455 |
+
"learning_rate": 0.0004,
|
31456 |
+
"loss": 6.4332,
|
31457 |
+
"step": 5162
|
31458 |
+
},
|
31459 |
+
{
|
31460 |
+
"epoch": 0.04,
|
31461 |
+
"learning_rate": 0.0004,
|
31462 |
+
"loss": 7.8098,
|
31463 |
+
"step": 5163
|
31464 |
+
},
|
31465 |
+
{
|
31466 |
+
"epoch": 0.04,
|
31467 |
+
"learning_rate": 0.0004,
|
31468 |
+
"loss": 6.2833,
|
31469 |
+
"step": 5164
|
31470 |
+
},
|
31471 |
+
{
|
31472 |
+
"epoch": 0.04,
|
31473 |
+
"learning_rate": 0.0004,
|
31474 |
+
"loss": 7.7168,
|
31475 |
+
"step": 5165
|
31476 |
+
},
|
31477 |
+
{
|
31478 |
+
"epoch": 0.04,
|
31479 |
+
"learning_rate": 0.0004,
|
31480 |
+
"loss": 8.851,
|
31481 |
+
"step": 5166
|
31482 |
+
},
|
31483 |
+
{
|
31484 |
+
"epoch": 0.04,
|
31485 |
+
"learning_rate": 0.0004,
|
31486 |
+
"loss": 4.8072,
|
31487 |
+
"step": 5167
|
31488 |
+
},
|
31489 |
+
{
|
31490 |
+
"epoch": 0.04,
|
31491 |
+
"learning_rate": 0.0004,
|
31492 |
+
"loss": 6.9745,
|
31493 |
+
"step": 5168
|
31494 |
+
},
|
31495 |
+
{
|
31496 |
+
"epoch": 0.04,
|
31497 |
+
"learning_rate": 0.0004,
|
31498 |
+
"loss": 6.5735,
|
31499 |
+
"step": 5169
|
31500 |
+
},
|
31501 |
+
{
|
31502 |
+
"epoch": 0.04,
|
31503 |
+
"learning_rate": 0.0004,
|
31504 |
+
"loss": 7.771,
|
31505 |
+
"step": 5170
|
31506 |
+
},
|
31507 |
+
{
|
31508 |
+
"epoch": 0.04,
|
31509 |
+
"learning_rate": 0.0004,
|
31510 |
+
"loss": 6.9335,
|
31511 |
+
"step": 5171
|
31512 |
+
},
|
31513 |
+
{
|
31514 |
+
"epoch": 0.04,
|
31515 |
+
"learning_rate": 0.0004,
|
31516 |
+
"loss": 6.856,
|
31517 |
+
"step": 5172
|
31518 |
+
},
|
31519 |
+
{
|
31520 |
+
"epoch": 0.04,
|
31521 |
+
"learning_rate": 0.0004,
|
31522 |
+
"loss": 6.4643,
|
31523 |
+
"step": 5173
|
31524 |
+
},
|
31525 |
+
{
|
31526 |
+
"epoch": 0.04,
|
31527 |
+
"learning_rate": 0.0004,
|
31528 |
+
"loss": 7.2565,
|
31529 |
+
"step": 5174
|
31530 |
+
},
|
31531 |
+
{
|
31532 |
+
"epoch": 0.04,
|
31533 |
+
"learning_rate": 0.0004,
|
31534 |
+
"loss": 8.0499,
|
31535 |
+
"step": 5175
|
31536 |
+
},
|
31537 |
+
{
|
31538 |
+
"epoch": 0.04,
|
31539 |
+
"learning_rate": 0.0004,
|
31540 |
+
"loss": 4.8558,
|
31541 |
+
"step": 5176
|
31542 |
+
},
|
31543 |
+
{
|
31544 |
+
"epoch": 0.04,
|
31545 |
+
"learning_rate": 0.0004,
|
31546 |
+
"loss": 3.99,
|
31547 |
+
"step": 5177
|
31548 |
+
},
|
31549 |
+
{
|
31550 |
+
"epoch": 0.04,
|
31551 |
+
"learning_rate": 0.0004,
|
31552 |
+
"loss": 3.7183,
|
31553 |
+
"step": 5178
|
31554 |
+
},
|
31555 |
+
{
|
31556 |
+
"epoch": 0.04,
|
31557 |
+
"learning_rate": 0.0004,
|
31558 |
+
"loss": 5.2353,
|
31559 |
+
"step": 5179
|
31560 |
+
},
|
31561 |
+
{
|
31562 |
+
"epoch": 0.04,
|
31563 |
+
"learning_rate": 0.0004,
|
31564 |
+
"loss": 8.1037,
|
31565 |
+
"step": 5180
|
31566 |
+
},
|
31567 |
+
{
|
31568 |
+
"epoch": 0.04,
|
31569 |
+
"learning_rate": 0.0004,
|
31570 |
+
"loss": 4.4882,
|
31571 |
+
"step": 5181
|
31572 |
+
},
|
31573 |
+
{
|
31574 |
+
"epoch": 0.04,
|
31575 |
+
"learning_rate": 0.0004,
|
31576 |
+
"loss": 6.2169,
|
31577 |
+
"step": 5182
|
31578 |
+
},
|
31579 |
+
{
|
31580 |
+
"epoch": 0.04,
|
31581 |
+
"learning_rate": 0.0004,
|
31582 |
+
"loss": 7.01,
|
31583 |
+
"step": 5183
|
31584 |
+
},
|
31585 |
+
{
|
31586 |
+
"epoch": 0.04,
|
31587 |
+
"learning_rate": 0.0004,
|
31588 |
+
"loss": 8.2869,
|
31589 |
+
"step": 5184
|
31590 |
+
},
|
31591 |
+
{
|
31592 |
+
"epoch": 0.04,
|
31593 |
+
"learning_rate": 0.0004,
|
31594 |
+
"loss": 3.5804,
|
31595 |
+
"step": 5185
|
31596 |
+
},
|
31597 |
+
{
|
31598 |
+
"epoch": 0.04,
|
31599 |
+
"learning_rate": 0.0004,
|
31600 |
+
"loss": 5.3033,
|
31601 |
+
"step": 5186
|
31602 |
+
},
|
31603 |
+
{
|
31604 |
+
"epoch": 0.04,
|
31605 |
+
"learning_rate": 0.0004,
|
31606 |
+
"loss": 4.1612,
|
31607 |
+
"step": 5187
|
31608 |
+
},
|
31609 |
+
{
|
31610 |
+
"epoch": 0.04,
|
31611 |
+
"learning_rate": 0.0004,
|
31612 |
+
"loss": 6.9619,
|
31613 |
+
"step": 5188
|
31614 |
+
},
|
31615 |
+
{
|
31616 |
+
"epoch": 0.04,
|
31617 |
+
"learning_rate": 0.0004,
|
31618 |
+
"loss": 5.3567,
|
31619 |
+
"step": 5189
|
31620 |
+
},
|
31621 |
+
{
|
31622 |
+
"epoch": 0.04,
|
31623 |
+
"learning_rate": 0.0004,
|
31624 |
+
"loss": 3.2493,
|
31625 |
+
"step": 5190
|
31626 |
+
},
|
31627 |
+
{
|
31628 |
+
"epoch": 0.04,
|
31629 |
+
"learning_rate": 0.0004,
|
31630 |
+
"loss": 7.3546,
|
31631 |
+
"step": 5191
|
31632 |
+
},
|
31633 |
+
{
|
31634 |
+
"epoch": 0.04,
|
31635 |
+
"learning_rate": 0.0004,
|
31636 |
+
"loss": 6.3477,
|
31637 |
+
"step": 5192
|
31638 |
+
},
|
31639 |
+
{
|
31640 |
+
"epoch": 0.04,
|
31641 |
+
"learning_rate": 0.0004,
|
31642 |
+
"loss": 6.7189,
|
31643 |
+
"step": 5193
|
31644 |
+
},
|
31645 |
+
{
|
31646 |
+
"epoch": 0.04,
|
31647 |
+
"learning_rate": 0.0004,
|
31648 |
+
"loss": 3.956,
|
31649 |
+
"step": 5194
|
31650 |
+
},
|
31651 |
+
{
|
31652 |
+
"epoch": 0.04,
|
31653 |
+
"learning_rate": 0.0004,
|
31654 |
+
"loss": 5.3166,
|
31655 |
+
"step": 5195
|
31656 |
+
},
|
31657 |
+
{
|
31658 |
+
"epoch": 0.04,
|
31659 |
+
"learning_rate": 0.0004,
|
31660 |
+
"loss": 6.0115,
|
31661 |
+
"step": 5196
|
31662 |
+
},
|
31663 |
+
{
|
31664 |
+
"epoch": 0.04,
|
31665 |
+
"learning_rate": 0.0004,
|
31666 |
+
"loss": 3.3418,
|
31667 |
+
"step": 5197
|
31668 |
+
},
|
31669 |
+
{
|
31670 |
+
"epoch": 0.04,
|
31671 |
+
"learning_rate": 0.0004,
|
31672 |
+
"loss": 3.1107,
|
31673 |
+
"step": 5198
|
31674 |
+
},
|
31675 |
+
{
|
31676 |
+
"epoch": 0.04,
|
31677 |
+
"learning_rate": 0.0004,
|
31678 |
+
"loss": 6.1123,
|
31679 |
+
"step": 5199
|
31680 |
+
},
|
31681 |
+
{
|
31682 |
+
"epoch": 0.04,
|
31683 |
+
"learning_rate": 0.0004,
|
31684 |
+
"loss": 3.6152,
|
31685 |
+
"step": 5200
|
31686 |
+
},
|
31687 |
+
{
|
31688 |
+
"epoch": 0.04,
|
31689 |
+
"eval_loss": 6.378727912902832,
|
31690 |
+
"eval_runtime": 22.4659,
|
31691 |
+
"eval_samples_per_second": 2.226,
|
31692 |
+
"eval_steps_per_second": 1.113,
|
31693 |
+
"step": 5200
|
31694 |
+
},
|
31695 |
+
{
|
31696 |
+
"epoch": 0.04,
|
31697 |
+
"mmlu_eval_accuracy": 0.32602813852813856,
|
31698 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365,
|
31699 |
+
"mmlu_eval_accuracy_anatomy": 0.35714285714285715,
|
31700 |
+
"mmlu_eval_accuracy_astronomy": 0.25,
|
31701 |
+
"mmlu_eval_accuracy_business_ethics": 0.3333333333333333,
|
31702 |
+
"mmlu_loss": 3.6307257652282714,
|
31703 |
+
"step": 5200
|
31704 |
+
},
|
31705 |
+
{
|
31706 |
+
"epoch": 0.04,
|
31707 |
+
"learning_rate": 0.0004,
|
31708 |
+
"loss": 8.3892,
|
31709 |
+
"step": 5201
|
31710 |
+
},
|
31711 |
+
{
|
31712 |
+
"epoch": 0.04,
|
31713 |
+
"learning_rate": 0.0004,
|
31714 |
+
"loss": 8.2349,
|
31715 |
+
"step": 5202
|
31716 |
+
},
|
31717 |
+
{
|
31718 |
+
"epoch": 0.04,
|
31719 |
+
"learning_rate": 0.0004,
|
31720 |
+
"loss": 7.5333,
|
31721 |
+
"step": 5203
|
31722 |
+
},
|
31723 |
+
{
|
31724 |
+
"epoch": 0.04,
|
31725 |
+
"learning_rate": 0.0004,
|
31726 |
+
"loss": 6.145,
|
31727 |
+
"step": 5204
|
31728 |
+
},
|
31729 |
+
{
|
31730 |
+
"epoch": 0.04,
|
31731 |
+
"learning_rate": 0.0004,
|
31732 |
+
"loss": 6.4543,
|
31733 |
+
"step": 5205
|
31734 |
+
},
|
31735 |
+
{
|
31736 |
+
"epoch": 0.04,
|
31737 |
+
"learning_rate": 0.0004,
|
31738 |
+
"loss": 2.7512,
|
31739 |
+
"step": 5206
|
31740 |
+
},
|
31741 |
+
{
|
31742 |
+
"epoch": 0.04,
|
31743 |
+
"learning_rate": 0.0004,
|
31744 |
+
"loss": 8.44,
|
31745 |
+
"step": 5207
|
31746 |
+
},
|
31747 |
+
{
|
31748 |
+
"epoch": 0.04,
|
31749 |
+
"learning_rate": 0.0004,
|
31750 |
+
"loss": 3.7399,
|
31751 |
+
"step": 5208
|
31752 |
+
},
|
31753 |
+
{
|
31754 |
+
"epoch": 0.04,
|
31755 |
+
"learning_rate": 0.0004,
|
31756 |
+
"loss": 6.133,
|
31757 |
+
"step": 5209
|
31758 |
+
},
|
31759 |
+
{
|
31760 |
+
"epoch": 0.04,
|
31761 |
+
"learning_rate": 0.0004,
|
31762 |
+
"loss": 5.1594,
|
31763 |
+
"step": 5210
|
31764 |
+
},
|
31765 |
+
{
|
31766 |
+
"epoch": 0.04,
|
31767 |
+
"learning_rate": 0.0004,
|
31768 |
+
"loss": 2.6307,
|
31769 |
+
"step": 5211
|
31770 |
+
},
|
31771 |
+
{
|
31772 |
+
"epoch": 0.04,
|
31773 |
+
"learning_rate": 0.0004,
|
31774 |
+
"loss": 3.4796,
|
31775 |
+
"step": 5212
|
31776 |
+
},
|
31777 |
+
{
|
31778 |
+
"epoch": 0.04,
|
31779 |
+
"learning_rate": 0.0004,
|
31780 |
+
"loss": 4.2767,
|
31781 |
+
"step": 5213
|
31782 |
+
},
|
31783 |
+
{
|
31784 |
+
"epoch": 0.04,
|
31785 |
+
"learning_rate": 0.0004,
|
31786 |
+
"loss": 8.9843,
|
31787 |
+
"step": 5214
|
31788 |
+
},
|
31789 |
+
{
|
31790 |
+
"epoch": 0.04,
|
31791 |
+
"learning_rate": 0.0004,
|
31792 |
+
"loss": 7.0799,
|
31793 |
+
"step": 5215
|
31794 |
+
},
|
31795 |
+
{
|
31796 |
+
"epoch": 0.04,
|
31797 |
+
"learning_rate": 0.0004,
|
31798 |
+
"loss": 7.1612,
|
31799 |
+
"step": 5216
|
31800 |
+
},
|
31801 |
+
{
|
31802 |
+
"epoch": 0.04,
|
31803 |
+
"learning_rate": 0.0004,
|
31804 |
+
"loss": 3.2503,
|
31805 |
+
"step": 5217
|
31806 |
+
},
|
31807 |
+
{
|
31808 |
+
"epoch": 0.04,
|
31809 |
+
"learning_rate": 0.0004,
|
31810 |
+
"loss": 4.4808,
|
31811 |
+
"step": 5218
|
31812 |
+
},
|
31813 |
+
{
|
31814 |
+
"epoch": 0.04,
|
31815 |
+
"learning_rate": 0.0004,
|
31816 |
+
"loss": 3.0658,
|
31817 |
+
"step": 5219
|
31818 |
+
},
|
31819 |
+
{
|
31820 |
+
"epoch": 0.04,
|
31821 |
+
"learning_rate": 0.0004,
|
31822 |
+
"loss": 8.8345,
|
31823 |
+
"step": 5220
|
31824 |
+
},
|
31825 |
+
{
|
31826 |
+
"epoch": 0.04,
|
31827 |
+
"learning_rate": 0.0004,
|
31828 |
+
"loss": 2.2569,
|
31829 |
+
"step": 5221
|
31830 |
+
},
|
31831 |
+
{
|
31832 |
+
"epoch": 0.04,
|
31833 |
+
"learning_rate": 0.0004,
|
31834 |
+
"loss": 6.3815,
|
31835 |
+
"step": 5222
|
31836 |
+
},
|
31837 |
+
{
|
31838 |
+
"epoch": 0.04,
|
31839 |
+
"learning_rate": 0.0004,
|
31840 |
+
"loss": 3.381,
|
31841 |
+
"step": 5223
|
31842 |
+
},
|
31843 |
+
{
|
31844 |
+
"epoch": 0.04,
|
31845 |
+
"learning_rate": 0.0004,
|
31846 |
+
"loss": 7.2786,
|
31847 |
+
"step": 5224
|
31848 |
+
},
|
31849 |
+
{
|
31850 |
+
"epoch": 0.04,
|
31851 |
+
"learning_rate": 0.0004,
|
31852 |
+
"loss": 8.1709,
|
31853 |
+
"step": 5225
|
31854 |
+
},
|
31855 |
+
{
|
31856 |
+
"epoch": 0.04,
|
31857 |
+
"learning_rate": 0.0004,
|
31858 |
+
"loss": 2.5537,
|
31859 |
+
"step": 5226
|
31860 |
+
},
|
31861 |
+
{
|
31862 |
+
"epoch": 0.04,
|
31863 |
+
"learning_rate": 0.0004,
|
31864 |
+
"loss": 6.498,
|
31865 |
+
"step": 5227
|
31866 |
+
},
|
31867 |
+
{
|
31868 |
+
"epoch": 0.04,
|
31869 |
+
"learning_rate": 0.0004,
|
31870 |
+
"loss": 6.9914,
|
31871 |
+
"step": 5228
|
31872 |
+
},
|
31873 |
+
{
|
31874 |
+
"epoch": 0.04,
|
31875 |
+
"learning_rate": 0.0004,
|
31876 |
+
"loss": 3.6207,
|
31877 |
+
"step": 5229
|
31878 |
+
},
|
31879 |
+
{
|
31880 |
+
"epoch": 0.04,
|
31881 |
+
"learning_rate": 0.0004,
|
31882 |
+
"loss": 5.1154,
|
31883 |
+
"step": 5230
|
31884 |
+
},
|
31885 |
+
{
|
31886 |
+
"epoch": 0.04,
|
31887 |
+
"learning_rate": 0.0004,
|
31888 |
+
"loss": 7.7722,
|
31889 |
+
"step": 5231
|
31890 |
+
},
|
31891 |
+
{
|
31892 |
+
"epoch": 0.04,
|
31893 |
+
"learning_rate": 0.0004,
|
31894 |
+
"loss": 3.0188,
|
31895 |
+
"step": 5232
|
31896 |
+
},
|
31897 |
+
{
|
31898 |
+
"epoch": 0.04,
|
31899 |
+
"learning_rate": 0.0004,
|
31900 |
+
"loss": 2.133,
|
31901 |
+
"step": 5233
|
31902 |
+
},
|
31903 |
+
{
|
31904 |
+
"epoch": 0.04,
|
31905 |
+
"learning_rate": 0.0004,
|
31906 |
+
"loss": 3.7835,
|
31907 |
+
"step": 5234
|
31908 |
+
},
|
31909 |
+
{
|
31910 |
+
"epoch": 0.04,
|
31911 |
+
"learning_rate": 0.0004,
|
31912 |
+
"loss": 2.1327,
|
31913 |
+
"step": 5235
|
31914 |
+
},
|
31915 |
+
{
|
31916 |
+
"epoch": 0.04,
|
31917 |
+
"learning_rate": 0.0004,
|
31918 |
+
"loss": 6.9416,
|
31919 |
+
"step": 5236
|
31920 |
+
},
|
31921 |
+
{
|
31922 |
+
"epoch": 0.04,
|
31923 |
+
"learning_rate": 0.0004,
|
31924 |
+
"loss": 7.1057,
|
31925 |
+
"step": 5237
|
31926 |
+
},
|
31927 |
+
{
|
31928 |
+
"epoch": 0.04,
|
31929 |
+
"learning_rate": 0.0004,
|
31930 |
+
"loss": 3.5148,
|
31931 |
+
"step": 5238
|
31932 |
+
},
|
31933 |
+
{
|
31934 |
+
"epoch": 0.04,
|
31935 |
+
"learning_rate": 0.0004,
|
31936 |
+
"loss": 3.8436,
|
31937 |
+
"step": 5239
|
31938 |
+
},
|
31939 |
+
{
|
31940 |
+
"epoch": 0.04,
|
31941 |
+
"learning_rate": 0.0004,
|
31942 |
+
"loss": 4.763,
|
31943 |
+
"step": 5240
|
31944 |
+
},
|
31945 |
+
{
|
31946 |
+
"epoch": 0.04,
|
31947 |
+
"learning_rate": 0.0004,
|
31948 |
+
"loss": 4.7498,
|
31949 |
+
"step": 5241
|
31950 |
+
},
|
31951 |
+
{
|
31952 |
+
"epoch": 0.04,
|
31953 |
+
"learning_rate": 0.0004,
|
31954 |
+
"loss": 6.7862,
|
31955 |
+
"step": 5242
|
31956 |
+
},
|
31957 |
+
{
|
31958 |
+
"epoch": 0.04,
|
31959 |
+
"learning_rate": 0.0004,
|
31960 |
+
"loss": 6.9326,
|
31961 |
+
"step": 5243
|
31962 |
+
},
|
31963 |
+
{
|
31964 |
+
"epoch": 0.04,
|
31965 |
+
"learning_rate": 0.0004,
|
31966 |
+
"loss": 2.1277,
|
31967 |
+
"step": 5244
|
31968 |
+
},
|
31969 |
+
{
|
31970 |
+
"epoch": 0.04,
|
31971 |
+
"learning_rate": 0.0004,
|
31972 |
+
"loss": 6.5697,
|
31973 |
+
"step": 5245
|
31974 |
+
},
|
31975 |
+
{
|
31976 |
+
"epoch": 0.04,
|
31977 |
+
"learning_rate": 0.0004,
|
31978 |
+
"loss": 2.0789,
|
31979 |
+
"step": 5246
|
31980 |
+
},
|
31981 |
+
{
|
31982 |
+
"epoch": 0.04,
|
31983 |
+
"learning_rate": 0.0004,
|
31984 |
+
"loss": 2.6917,
|
31985 |
+
"step": 5247
|
31986 |
+
},
|
31987 |
+
{
|
31988 |
+
"epoch": 0.04,
|
31989 |
+
"learning_rate": 0.0004,
|
31990 |
+
"loss": 2.5857,
|
31991 |
+
"step": 5248
|
31992 |
+
},
|
31993 |
+
{
|
31994 |
+
"epoch": 0.04,
|
31995 |
+
"learning_rate": 0.0004,
|
31996 |
+
"loss": 2.5904,
|
31997 |
+
"step": 5249
|
31998 |
+
},
|
31999 |
+
{
|
32000 |
+
"epoch": 0.04,
|
32001 |
+
"learning_rate": 0.0004,
|
32002 |
+
"loss": 3.6245,
|
32003 |
+
"step": 5250
|
32004 |
+
},
|
32005 |
+
{
|
32006 |
+
"epoch": 0.04,
|
32007 |
+
"learning_rate": 0.0004,
|
32008 |
+
"loss": 6.5449,
|
32009 |
+
"step": 5251
|
32010 |
+
},
|
32011 |
+
{
|
32012 |
+
"epoch": 0.04,
|
32013 |
+
"learning_rate": 0.0004,
|
32014 |
+
"loss": 9.9082,
|
32015 |
+
"step": 5252
|
32016 |
+
},
|
32017 |
+
{
|
32018 |
+
"epoch": 0.04,
|
32019 |
+
"learning_rate": 0.0004,
|
32020 |
+
"loss": 7.9662,
|
32021 |
+
"step": 5253
|
32022 |
+
},
|
32023 |
+
{
|
32024 |
+
"epoch": 0.04,
|
32025 |
+
"learning_rate": 0.0004,
|
32026 |
+
"loss": 7.733,
|
32027 |
+
"step": 5254
|
32028 |
+
},
|
32029 |
+
{
|
32030 |
+
"epoch": 0.04,
|
32031 |
+
"learning_rate": 0.0004,
|
32032 |
+
"loss": 7.1067,
|
32033 |
+
"step": 5255
|
32034 |
+
},
|
32035 |
+
{
|
32036 |
+
"epoch": 0.04,
|
32037 |
+
"learning_rate": 0.0004,
|
32038 |
+
"loss": 5.9188,
|
32039 |
+
"step": 5256
|
32040 |
+
},
|
32041 |
+
{
|
32042 |
+
"epoch": 0.04,
|
32043 |
+
"learning_rate": 0.0004,
|
32044 |
+
"loss": 4.5555,
|
32045 |
+
"step": 5257
|
32046 |
+
},
|
32047 |
+
{
|
32048 |
+
"epoch": 0.04,
|
32049 |
+
"learning_rate": 0.0004,
|
32050 |
+
"loss": 6.7376,
|
32051 |
+
"step": 5258
|
32052 |
+
},
|
32053 |
+
{
|
32054 |
+
"epoch": 0.04,
|
32055 |
+
"learning_rate": 0.0004,
|
32056 |
+
"loss": 9.3653,
|
32057 |
+
"step": 5259
|
32058 |
+
},
|
32059 |
+
{
|
32060 |
+
"epoch": 0.04,
|
32061 |
+
"learning_rate": 0.0004,
|
32062 |
+
"loss": 5.7456,
|
32063 |
+
"step": 5260
|
32064 |
+
},
|
32065 |
+
{
|
32066 |
+
"epoch": 0.04,
|
32067 |
+
"learning_rate": 0.0004,
|
32068 |
+
"loss": 6.1382,
|
32069 |
+
"step": 5261
|
32070 |
+
},
|
32071 |
+
{
|
32072 |
+
"epoch": 0.04,
|
32073 |
+
"learning_rate": 0.0004,
|
32074 |
+
"loss": 2.472,
|
32075 |
+
"step": 5262
|
32076 |
+
},
|
32077 |
+
{
|
32078 |
+
"epoch": 0.04,
|
32079 |
+
"learning_rate": 0.0004,
|
32080 |
+
"loss": 7.0488,
|
32081 |
+
"step": 5263
|
32082 |
+
},
|
32083 |
+
{
|
32084 |
+
"epoch": 0.04,
|
32085 |
+
"learning_rate": 0.0004,
|
32086 |
+
"loss": 7.4769,
|
32087 |
+
"step": 5264
|
32088 |
+
},
|
32089 |
+
{
|
32090 |
+
"epoch": 0.04,
|
32091 |
+
"learning_rate": 0.0004,
|
32092 |
+
"loss": 4.5465,
|
32093 |
+
"step": 5265
|
32094 |
+
},
|
32095 |
+
{
|
32096 |
+
"epoch": 0.04,
|
32097 |
+
"learning_rate": 0.0004,
|
32098 |
+
"loss": 2.8687,
|
32099 |
+
"step": 5266
|
32100 |
+
},
|
32101 |
+
{
|
32102 |
+
"epoch": 0.04,
|
32103 |
+
"learning_rate": 0.0004,
|
32104 |
+
"loss": 6.535,
|
32105 |
+
"step": 5267
|
32106 |
+
},
|
32107 |
+
{
|
32108 |
+
"epoch": 0.04,
|
32109 |
+
"learning_rate": 0.0004,
|
32110 |
+
"loss": 3.935,
|
32111 |
+
"step": 5268
|
32112 |
+
},
|
32113 |
+
{
|
32114 |
+
"epoch": 0.04,
|
32115 |
+
"learning_rate": 0.0004,
|
32116 |
+
"loss": 6.2081,
|
32117 |
+
"step": 5269
|
32118 |
+
},
|
32119 |
+
{
|
32120 |
+
"epoch": 0.04,
|
32121 |
+
"learning_rate": 0.0004,
|
32122 |
+
"loss": 3.5528,
|
32123 |
+
"step": 5270
|
32124 |
+
},
|
32125 |
+
{
|
32126 |
+
"epoch": 0.04,
|
32127 |
+
"learning_rate": 0.0004,
|
32128 |
+
"loss": 5.2201,
|
32129 |
+
"step": 5271
|
32130 |
+
},
|
32131 |
+
{
|
32132 |
+
"epoch": 0.04,
|
32133 |
+
"learning_rate": 0.0004,
|
32134 |
+
"loss": 6.3348,
|
32135 |
+
"step": 5272
|
32136 |
+
},
|
32137 |
+
{
|
32138 |
+
"epoch": 0.04,
|
32139 |
+
"learning_rate": 0.0004,
|
32140 |
+
"loss": 6.8958,
|
32141 |
+
"step": 5273
|
32142 |
+
},
|
32143 |
+
{
|
32144 |
+
"epoch": 0.04,
|
32145 |
+
"learning_rate": 0.0004,
|
32146 |
+
"loss": 6.2687,
|
32147 |
+
"step": 5274
|
32148 |
+
},
|
32149 |
+
{
|
32150 |
+
"epoch": 0.04,
|
32151 |
+
"learning_rate": 0.0004,
|
32152 |
+
"loss": 4.2481,
|
32153 |
+
"step": 5275
|
32154 |
+
},
|
32155 |
+
{
|
32156 |
+
"epoch": 0.04,
|
32157 |
+
"learning_rate": 0.0004,
|
32158 |
+
"loss": 3.1491,
|
32159 |
+
"step": 5276
|
32160 |
+
},
|
32161 |
+
{
|
32162 |
+
"epoch": 0.04,
|
32163 |
+
"learning_rate": 0.0004,
|
32164 |
+
"loss": 2.9855,
|
32165 |
+
"step": 5277
|
32166 |
+
},
|
32167 |
+
{
|
32168 |
+
"epoch": 0.04,
|
32169 |
+
"learning_rate": 0.0004,
|
32170 |
+
"loss": 6.7815,
|
32171 |
+
"step": 5278
|
32172 |
+
},
|
32173 |
+
{
|
32174 |
+
"epoch": 0.04,
|
32175 |
+
"learning_rate": 0.0004,
|
32176 |
+
"loss": 8.0858,
|
32177 |
+
"step": 5279
|
32178 |
+
},
|
32179 |
+
{
|
32180 |
+
"epoch": 0.04,
|
32181 |
+
"learning_rate": 0.0004,
|
32182 |
+
"loss": 5.8508,
|
32183 |
+
"step": 5280
|
32184 |
+
},
|
32185 |
+
{
|
32186 |
+
"epoch": 0.04,
|
32187 |
+
"learning_rate": 0.0004,
|
32188 |
+
"loss": 6.1981,
|
32189 |
+
"step": 5281
|
32190 |
+
},
|
32191 |
+
{
|
32192 |
+
"epoch": 0.04,
|
32193 |
+
"learning_rate": 0.0004,
|
32194 |
+
"loss": 6.0052,
|
32195 |
+
"step": 5282
|
32196 |
+
},
|
32197 |
+
{
|
32198 |
+
"epoch": 0.04,
|
32199 |
+
"learning_rate": 0.0004,
|
32200 |
+
"loss": 6.6153,
|
32201 |
+
"step": 5283
|
32202 |
+
},
|
32203 |
+
{
|
32204 |
+
"epoch": 0.04,
|
32205 |
+
"learning_rate": 0.0004,
|
32206 |
+
"loss": 3.3998,
|
32207 |
+
"step": 5284
|
32208 |
+
},
|
32209 |
+
{
|
32210 |
+
"epoch": 0.04,
|
32211 |
+
"learning_rate": 0.0004,
|
32212 |
+
"loss": 7.9997,
|
32213 |
+
"step": 5285
|
32214 |
+
},
|
32215 |
+
{
|
32216 |
+
"epoch": 0.04,
|
32217 |
+
"learning_rate": 0.0004,
|
32218 |
+
"loss": 4.3768,
|
32219 |
+
"step": 5286
|
32220 |
+
},
|
32221 |
+
{
|
32222 |
+
"epoch": 0.04,
|
32223 |
+
"learning_rate": 0.0004,
|
32224 |
+
"loss": 7.3477,
|
32225 |
+
"step": 5287
|
32226 |
+
},
|
32227 |
+
{
|
32228 |
+
"epoch": 0.04,
|
32229 |
+
"learning_rate": 0.0004,
|
32230 |
+
"loss": 3.5558,
|
32231 |
+
"step": 5288
|
32232 |
+
},
|
32233 |
+
{
|
32234 |
+
"epoch": 0.04,
|
32235 |
+
"learning_rate": 0.0004,
|
32236 |
+
"loss": 5.4668,
|
32237 |
+
"step": 5289
|
32238 |
+
},
|
32239 |
+
{
|
32240 |
+
"epoch": 0.04,
|
32241 |
+
"learning_rate": 0.0004,
|
32242 |
+
"loss": 6.571,
|
32243 |
+
"step": 5290
|
32244 |
+
},
|
32245 |
+
{
|
32246 |
+
"epoch": 0.04,
|
32247 |
+
"learning_rate": 0.0004,
|
32248 |
+
"loss": 4.8902,
|
32249 |
+
"step": 5291
|
32250 |
+
},
|
32251 |
+
{
|
32252 |
+
"epoch": 0.04,
|
32253 |
+
"learning_rate": 0.0004,
|
32254 |
+
"loss": 8.3882,
|
32255 |
+
"step": 5292
|
32256 |
+
},
|
32257 |
+
{
|
32258 |
+
"epoch": 0.04,
|
32259 |
+
"learning_rate": 0.0004,
|
32260 |
+
"loss": 2.6453,
|
32261 |
+
"step": 5293
|
32262 |
+
},
|
32263 |
+
{
|
32264 |
+
"epoch": 0.04,
|
32265 |
+
"learning_rate": 0.0004,
|
32266 |
+
"loss": 3.0346,
|
32267 |
+
"step": 5294
|
32268 |
+
},
|
32269 |
+
{
|
32270 |
+
"epoch": 0.04,
|
32271 |
+
"learning_rate": 0.0004,
|
32272 |
+
"loss": 6.6004,
|
32273 |
+
"step": 5295
|
32274 |
+
},
|
32275 |
+
{
|
32276 |
+
"epoch": 0.04,
|
32277 |
+
"learning_rate": 0.0004,
|
32278 |
+
"loss": 3.2908,
|
32279 |
+
"step": 5296
|
32280 |
+
},
|
32281 |
+
{
|
32282 |
+
"epoch": 0.04,
|
32283 |
+
"learning_rate": 0.0004,
|
32284 |
+
"loss": 5.8072,
|
32285 |
+
"step": 5297
|
32286 |
+
},
|
32287 |
+
{
|
32288 |
+
"epoch": 0.04,
|
32289 |
+
"learning_rate": 0.0004,
|
32290 |
+
"loss": 5.5529,
|
32291 |
+
"step": 5298
|
32292 |
+
},
|
32293 |
+
{
|
32294 |
+
"epoch": 0.04,
|
32295 |
+
"learning_rate": 0.0004,
|
32296 |
+
"loss": 2.8983,
|
32297 |
+
"step": 5299
|
32298 |
+
},
|
32299 |
+
{
|
32300 |
+
"epoch": 0.04,
|
32301 |
+
"learning_rate": 0.0004,
|
32302 |
+
"loss": 5.3365,
|
32303 |
+
"step": 5300
|
32304 |
+
},
|
32305 |
+
{
|
32306 |
+
"epoch": 0.04,
|
32307 |
+
"learning_rate": 0.0004,
|
32308 |
+
"loss": 7.7748,
|
32309 |
+
"step": 5301
|
32310 |
+
},
|
32311 |
+
{
|
32312 |
+
"epoch": 0.04,
|
32313 |
+
"learning_rate": 0.0004,
|
32314 |
+
"loss": 8.1817,
|
32315 |
+
"step": 5302
|
32316 |
+
},
|
32317 |
+
{
|
32318 |
+
"epoch": 0.04,
|
32319 |
+
"learning_rate": 0.0004,
|
32320 |
+
"loss": 4.1362,
|
32321 |
+
"step": 5303
|
32322 |
+
},
|
32323 |
+
{
|
32324 |
+
"epoch": 0.04,
|
32325 |
+
"learning_rate": 0.0004,
|
32326 |
+
"loss": 7.4656,
|
32327 |
+
"step": 5304
|
32328 |
+
},
|
32329 |
+
{
|
32330 |
+
"epoch": 0.04,
|
32331 |
+
"learning_rate": 0.0004,
|
32332 |
+
"loss": 8.1376,
|
32333 |
+
"step": 5305
|
32334 |
+
},
|
32335 |
+
{
|
32336 |
+
"epoch": 0.04,
|
32337 |
+
"learning_rate": 0.0004,
|
32338 |
+
"loss": 8.9722,
|
32339 |
+
"step": 5306
|
32340 |
+
},
|
32341 |
+
{
|
32342 |
+
"epoch": 0.04,
|
32343 |
+
"learning_rate": 0.0004,
|
32344 |
+
"loss": 6.6619,
|
32345 |
+
"step": 5307
|
32346 |
+
},
|
32347 |
+
{
|
32348 |
+
"epoch": 0.04,
|
32349 |
+
"learning_rate": 0.0004,
|
32350 |
+
"loss": 7.3412,
|
32351 |
+
"step": 5308
|
32352 |
+
},
|
32353 |
+
{
|
32354 |
+
"epoch": 0.04,
|
32355 |
+
"learning_rate": 0.0004,
|
32356 |
+
"loss": 2.8955,
|
32357 |
+
"step": 5309
|
32358 |
+
},
|
32359 |
+
{
|
32360 |
+
"epoch": 0.04,
|
32361 |
+
"learning_rate": 0.0004,
|
32362 |
+
"loss": 8.0063,
|
32363 |
+
"step": 5310
|
32364 |
+
},
|
32365 |
+
{
|
32366 |
+
"epoch": 0.04,
|
32367 |
+
"learning_rate": 0.0004,
|
32368 |
+
"loss": 2.7085,
|
32369 |
+
"step": 5311
|
32370 |
+
},
|
32371 |
+
{
|
32372 |
+
"epoch": 0.04,
|
32373 |
+
"learning_rate": 0.0004,
|
32374 |
+
"loss": 7.3925,
|
32375 |
+
"step": 5312
|
32376 |
+
},
|
32377 |
+
{
|
32378 |
+
"epoch": 0.04,
|
32379 |
+
"learning_rate": 0.0004,
|
32380 |
+
"loss": 2.8739,
|
32381 |
+
"step": 5313
|
32382 |
+
},
|
32383 |
+
{
|
32384 |
+
"epoch": 0.04,
|
32385 |
+
"learning_rate": 0.0004,
|
32386 |
+
"loss": 5.2402,
|
32387 |
+
"step": 5314
|
32388 |
+
},
|
32389 |
+
{
|
32390 |
+
"epoch": 0.04,
|
32391 |
+
"learning_rate": 0.0004,
|
32392 |
+
"loss": 8.3129,
|
32393 |
+
"step": 5315
|
32394 |
+
},
|
32395 |
+
{
|
32396 |
+
"epoch": 0.04,
|
32397 |
+
"learning_rate": 0.0004,
|
32398 |
+
"loss": 7.4129,
|
32399 |
+
"step": 5316
|
32400 |
+
},
|
32401 |
+
{
|
32402 |
+
"epoch": 0.04,
|
32403 |
+
"learning_rate": 0.0004,
|
32404 |
+
"loss": 7.722,
|
32405 |
+
"step": 5317
|
32406 |
+
},
|
32407 |
+
{
|
32408 |
+
"epoch": 0.04,
|
32409 |
+
"learning_rate": 0.0004,
|
32410 |
+
"loss": 8.2092,
|
32411 |
+
"step": 5318
|
32412 |
+
},
|
32413 |
+
{
|
32414 |
+
"epoch": 0.04,
|
32415 |
+
"learning_rate": 0.0004,
|
32416 |
+
"loss": 5.2358,
|
32417 |
+
"step": 5319
|
32418 |
+
},
|
32419 |
+
{
|
32420 |
+
"epoch": 0.04,
|
32421 |
+
"learning_rate": 0.0004,
|
32422 |
+
"loss": 7.9978,
|
32423 |
+
"step": 5320
|
32424 |
+
},
|
32425 |
+
{
|
32426 |
+
"epoch": 0.04,
|
32427 |
+
"learning_rate": 0.0004,
|
32428 |
+
"loss": 7.9746,
|
32429 |
+
"step": 5321
|
32430 |
+
},
|
32431 |
+
{
|
32432 |
+
"epoch": 0.04,
|
32433 |
+
"learning_rate": 0.0004,
|
32434 |
+
"loss": 4.1538,
|
32435 |
+
"step": 5322
|
32436 |
+
},
|
32437 |
+
{
|
32438 |
+
"epoch": 0.04,
|
32439 |
+
"learning_rate": 0.0004,
|
32440 |
+
"loss": 6.4998,
|
32441 |
+
"step": 5323
|
32442 |
+
},
|
32443 |
+
{
|
32444 |
+
"epoch": 0.04,
|
32445 |
+
"learning_rate": 0.0004,
|
32446 |
+
"loss": 3.8847,
|
32447 |
+
"step": 5324
|
32448 |
+
},
|
32449 |
+
{
|
32450 |
+
"epoch": 0.04,
|
32451 |
+
"learning_rate": 0.0004,
|
32452 |
+
"loss": 6.3631,
|
32453 |
+
"step": 5325
|
32454 |
+
},
|
32455 |
+
{
|
32456 |
+
"epoch": 0.04,
|
32457 |
+
"learning_rate": 0.0004,
|
32458 |
+
"loss": 5.1982,
|
32459 |
+
"step": 5326
|
32460 |
+
},
|
32461 |
+
{
|
32462 |
+
"epoch": 0.04,
|
32463 |
+
"learning_rate": 0.0004,
|
32464 |
+
"loss": 3.6708,
|
32465 |
+
"step": 5327
|
32466 |
+
},
|
32467 |
+
{
|
32468 |
+
"epoch": 0.04,
|
32469 |
+
"learning_rate": 0.0004,
|
32470 |
+
"loss": 5.3822,
|
32471 |
+
"step": 5328
|
32472 |
+
},
|
32473 |
+
{
|
32474 |
+
"epoch": 0.04,
|
32475 |
+
"learning_rate": 0.0004,
|
32476 |
+
"loss": 9.2081,
|
32477 |
+
"step": 5329
|
32478 |
+
},
|
32479 |
+
{
|
32480 |
+
"epoch": 0.04,
|
32481 |
+
"learning_rate": 0.0004,
|
32482 |
+
"loss": 2.4944,
|
32483 |
+
"step": 5330
|
32484 |
+
},
|
32485 |
+
{
|
32486 |
+
"epoch": 0.04,
|
32487 |
+
"learning_rate": 0.0004,
|
32488 |
+
"loss": 4.5158,
|
32489 |
+
"step": 5331
|
32490 |
+
},
|
32491 |
+
{
|
32492 |
+
"epoch": 0.04,
|
32493 |
+
"learning_rate": 0.0004,
|
32494 |
+
"loss": 3.287,
|
32495 |
+
"step": 5332
|
32496 |
+
},
|
32497 |
+
{
|
32498 |
+
"epoch": 0.04,
|
32499 |
+
"learning_rate": 0.0004,
|
32500 |
+
"loss": 6.0359,
|
32501 |
+
"step": 5333
|
32502 |
+
},
|
32503 |
+
{
|
32504 |
+
"epoch": 0.04,
|
32505 |
+
"learning_rate": 0.0004,
|
32506 |
+
"loss": 5.2941,
|
32507 |
+
"step": 5334
|
32508 |
+
},
|
32509 |
+
{
|
32510 |
+
"epoch": 0.04,
|
32511 |
+
"learning_rate": 0.0004,
|
32512 |
+
"loss": 6.0545,
|
32513 |
+
"step": 5335
|
32514 |
+
},
|
32515 |
+
{
|
32516 |
+
"epoch": 0.04,
|
32517 |
+
"learning_rate": 0.0004,
|
32518 |
+
"loss": 5.9831,
|
32519 |
+
"step": 5336
|
32520 |
+
},
|
32521 |
+
{
|
32522 |
+
"epoch": 0.04,
|
32523 |
+
"learning_rate": 0.0004,
|
32524 |
+
"loss": 5.0593,
|
32525 |
+
"step": 5337
|
32526 |
+
},
|
32527 |
+
{
|
32528 |
+
"epoch": 0.04,
|
32529 |
+
"learning_rate": 0.0004,
|
32530 |
+
"loss": 2.3721,
|
32531 |
+
"step": 5338
|
32532 |
+
},
|
32533 |
+
{
|
32534 |
+
"epoch": 0.04,
|
32535 |
+
"learning_rate": 0.0004,
|
32536 |
+
"loss": 2.6548,
|
32537 |
+
"step": 5339
|
32538 |
+
},
|
32539 |
+
{
|
32540 |
+
"epoch": 0.04,
|
32541 |
+
"learning_rate": 0.0004,
|
32542 |
+
"loss": 7.4947,
|
32543 |
+
"step": 5340
|
32544 |
+
},
|
32545 |
+
{
|
32546 |
+
"epoch": 0.04,
|
32547 |
+
"learning_rate": 0.0004,
|
32548 |
+
"loss": 5.9871,
|
32549 |
+
"step": 5341
|
32550 |
+
},
|
32551 |
+
{
|
32552 |
+
"epoch": 0.04,
|
32553 |
+
"learning_rate": 0.0004,
|
32554 |
+
"loss": 5.8511,
|
32555 |
+
"step": 5342
|
32556 |
+
},
|
32557 |
+
{
|
32558 |
+
"epoch": 0.04,
|
32559 |
+
"learning_rate": 0.0004,
|
32560 |
+
"loss": 5.0414,
|
32561 |
+
"step": 5343
|
32562 |
+
},
|
32563 |
+
{
|
32564 |
+
"epoch": 0.04,
|
32565 |
+
"learning_rate": 0.0004,
|
32566 |
+
"loss": 4.5975,
|
32567 |
+
"step": 5344
|
32568 |
+
},
|
32569 |
+
{
|
32570 |
+
"epoch": 0.04,
|
32571 |
+
"learning_rate": 0.0004,
|
32572 |
+
"loss": 4.9401,
|
32573 |
+
"step": 5345
|
32574 |
+
},
|
32575 |
+
{
|
32576 |
+
"epoch": 0.04,
|
32577 |
+
"learning_rate": 0.0004,
|
32578 |
+
"loss": 6.1735,
|
32579 |
+
"step": 5346
|
32580 |
+
},
|
32581 |
+
{
|
32582 |
+
"epoch": 0.04,
|
32583 |
+
"learning_rate": 0.0004,
|
32584 |
+
"loss": 8.225,
|
32585 |
+
"step": 5347
|
32586 |
+
},
|
32587 |
+
{
|
32588 |
+
"epoch": 0.04,
|
32589 |
+
"learning_rate": 0.0004,
|
32590 |
+
"loss": 2.3719,
|
32591 |
+
"step": 5348
|
32592 |
+
},
|
32593 |
+
{
|
32594 |
+
"epoch": 0.04,
|
32595 |
+
"learning_rate": 0.0004,
|
32596 |
+
"loss": 7.0078,
|
32597 |
+
"step": 5349
|
32598 |
+
},
|
32599 |
+
{
|
32600 |
+
"epoch": 0.04,
|
32601 |
+
"learning_rate": 0.0004,
|
32602 |
+
"loss": 7.067,
|
32603 |
+
"step": 5350
|
32604 |
+
},
|
32605 |
+
{
|
32606 |
+
"epoch": 0.04,
|
32607 |
+
"learning_rate": 0.0004,
|
32608 |
+
"loss": 6.2358,
|
32609 |
+
"step": 5351
|
32610 |
+
},
|
32611 |
+
{
|
32612 |
+
"epoch": 0.04,
|
32613 |
+
"learning_rate": 0.0004,
|
32614 |
+
"loss": 7.7797,
|
32615 |
+
"step": 5352
|
32616 |
+
},
|
32617 |
+
{
|
32618 |
+
"epoch": 0.04,
|
32619 |
+
"learning_rate": 0.0004,
|
32620 |
+
"loss": 7.3039,
|
32621 |
+
"step": 5353
|
32622 |
+
},
|
32623 |
+
{
|
32624 |
+
"epoch": 0.04,
|
32625 |
+
"learning_rate": 0.0004,
|
32626 |
+
"loss": 7.7088,
|
32627 |
+
"step": 5354
|
32628 |
+
},
|
32629 |
+
{
|
32630 |
+
"epoch": 0.04,
|
32631 |
+
"learning_rate": 0.0004,
|
32632 |
+
"loss": 5.8908,
|
32633 |
+
"step": 5355
|
32634 |
+
},
|
32635 |
+
{
|
32636 |
+
"epoch": 0.04,
|
32637 |
+
"learning_rate": 0.0004,
|
32638 |
+
"loss": 6.053,
|
32639 |
+
"step": 5356
|
32640 |
+
},
|
32641 |
+
{
|
32642 |
+
"epoch": 0.04,
|
32643 |
+
"learning_rate": 0.0004,
|
32644 |
+
"loss": 8.1785,
|
32645 |
+
"step": 5357
|
32646 |
+
},
|
32647 |
+
{
|
32648 |
+
"epoch": 0.04,
|
32649 |
+
"learning_rate": 0.0004,
|
32650 |
+
"loss": 8.9685,
|
32651 |
+
"step": 5358
|
32652 |
+
},
|
32653 |
+
{
|
32654 |
+
"epoch": 0.04,
|
32655 |
+
"learning_rate": 0.0004,
|
32656 |
+
"loss": 3.5938,
|
32657 |
+
"step": 5359
|
32658 |
+
},
|
32659 |
+
{
|
32660 |
+
"epoch": 0.04,
|
32661 |
+
"learning_rate": 0.0004,
|
32662 |
+
"loss": 3.55,
|
32663 |
+
"step": 5360
|
32664 |
+
},
|
32665 |
+
{
|
32666 |
+
"epoch": 0.04,
|
32667 |
+
"learning_rate": 0.0004,
|
32668 |
+
"loss": 8.9066,
|
32669 |
+
"step": 5361
|
32670 |
+
},
|
32671 |
+
{
|
32672 |
+
"epoch": 0.04,
|
32673 |
+
"learning_rate": 0.0004,
|
32674 |
+
"loss": 7.1162,
|
32675 |
+
"step": 5362
|
32676 |
+
},
|
32677 |
+
{
|
32678 |
+
"epoch": 0.04,
|
32679 |
+
"learning_rate": 0.0004,
|
32680 |
+
"loss": 4.3855,
|
32681 |
+
"step": 5363
|
32682 |
+
},
|
32683 |
+
{
|
32684 |
+
"epoch": 0.04,
|
32685 |
+
"learning_rate": 0.0004,
|
32686 |
+
"loss": 7.3739,
|
32687 |
+
"step": 5364
|
32688 |
+
},
|
32689 |
+
{
|
32690 |
+
"epoch": 0.04,
|
32691 |
+
"learning_rate": 0.0004,
|
32692 |
+
"loss": 4.6735,
|
32693 |
+
"step": 5365
|
32694 |
+
},
|
32695 |
+
{
|
32696 |
+
"epoch": 0.04,
|
32697 |
+
"learning_rate": 0.0004,
|
32698 |
+
"loss": 7.4358,
|
32699 |
+
"step": 5366
|
32700 |
+
},
|
32701 |
+
{
|
32702 |
+
"epoch": 0.04,
|
32703 |
+
"learning_rate": 0.0004,
|
32704 |
+
"loss": 8.0145,
|
32705 |
+
"step": 5367
|
32706 |
+
},
|
32707 |
+
{
|
32708 |
+
"epoch": 0.04,
|
32709 |
+
"learning_rate": 0.0004,
|
32710 |
+
"loss": 6.3903,
|
32711 |
+
"step": 5368
|
32712 |
+
},
|
32713 |
+
{
|
32714 |
+
"epoch": 0.04,
|
32715 |
+
"learning_rate": 0.0004,
|
32716 |
+
"loss": 6.7513,
|
32717 |
+
"step": 5369
|
32718 |
+
},
|
32719 |
+
{
|
32720 |
+
"epoch": 0.04,
|
32721 |
+
"learning_rate": 0.0004,
|
32722 |
+
"loss": 6.3265,
|
32723 |
+
"step": 5370
|
32724 |
+
},
|
32725 |
+
{
|
32726 |
+
"epoch": 0.04,
|
32727 |
+
"learning_rate": 0.0004,
|
32728 |
+
"loss": 6.0655,
|
32729 |
+
"step": 5371
|
32730 |
+
},
|
32731 |
+
{
|
32732 |
+
"epoch": 0.04,
|
32733 |
+
"learning_rate": 0.0004,
|
32734 |
+
"loss": 5.046,
|
32735 |
+
"step": 5372
|
32736 |
+
},
|
32737 |
+
{
|
32738 |
+
"epoch": 0.04,
|
32739 |
+
"learning_rate": 0.0004,
|
32740 |
+
"loss": 5.9508,
|
32741 |
+
"step": 5373
|
32742 |
+
},
|
32743 |
+
{
|
32744 |
+
"epoch": 0.04,
|
32745 |
+
"learning_rate": 0.0004,
|
32746 |
+
"loss": 4.7946,
|
32747 |
+
"step": 5374
|
32748 |
+
},
|
32749 |
+
{
|
32750 |
+
"epoch": 0.04,
|
32751 |
+
"learning_rate": 0.0004,
|
32752 |
+
"loss": 6.4541,
|
32753 |
+
"step": 5375
|
32754 |
+
},
|
32755 |
+
{
|
32756 |
+
"epoch": 0.04,
|
32757 |
+
"learning_rate": 0.0004,
|
32758 |
+
"loss": 6.6098,
|
32759 |
+
"step": 5376
|
32760 |
+
},
|
32761 |
+
{
|
32762 |
+
"epoch": 0.04,
|
32763 |
+
"learning_rate": 0.0004,
|
32764 |
+
"loss": 4.124,
|
32765 |
+
"step": 5377
|
32766 |
+
},
|
32767 |
+
{
|
32768 |
+
"epoch": 0.04,
|
32769 |
+
"learning_rate": 0.0004,
|
32770 |
+
"loss": 6.539,
|
32771 |
+
"step": 5378
|
32772 |
+
},
|
32773 |
+
{
|
32774 |
+
"epoch": 0.04,
|
32775 |
+
"learning_rate": 0.0004,
|
32776 |
+
"loss": 7.8777,
|
32777 |
+
"step": 5379
|
32778 |
+
},
|
32779 |
+
{
|
32780 |
+
"epoch": 0.04,
|
32781 |
+
"learning_rate": 0.0004,
|
32782 |
+
"loss": 6.6315,
|
32783 |
+
"step": 5380
|
32784 |
+
},
|
32785 |
+
{
|
32786 |
+
"epoch": 0.04,
|
32787 |
+
"learning_rate": 0.0004,
|
32788 |
+
"loss": 7.1006,
|
32789 |
+
"step": 5381
|
32790 |
+
},
|
32791 |
+
{
|
32792 |
+
"epoch": 0.04,
|
32793 |
+
"learning_rate": 0.0004,
|
32794 |
+
"loss": 5.1972,
|
32795 |
+
"step": 5382
|
32796 |
+
},
|
32797 |
+
{
|
32798 |
+
"epoch": 0.04,
|
32799 |
+
"learning_rate": 0.0004,
|
32800 |
+
"loss": 8.1427,
|
32801 |
+
"step": 5383
|
32802 |
+
},
|
32803 |
+
{
|
32804 |
+
"epoch": 0.04,
|
32805 |
+
"learning_rate": 0.0004,
|
32806 |
+
"loss": 6.1585,
|
32807 |
+
"step": 5384
|
32808 |
+
},
|
32809 |
+
{
|
32810 |
+
"epoch": 0.04,
|
32811 |
+
"learning_rate": 0.0004,
|
32812 |
+
"loss": 2.6632,
|
32813 |
+
"step": 5385
|
32814 |
+
},
|
32815 |
+
{
|
32816 |
+
"epoch": 0.04,
|
32817 |
+
"learning_rate": 0.0004,
|
32818 |
+
"loss": 3.2398,
|
32819 |
+
"step": 5386
|
32820 |
+
},
|
32821 |
+
{
|
32822 |
+
"epoch": 0.04,
|
32823 |
+
"learning_rate": 0.0004,
|
32824 |
+
"loss": 2.3961,
|
32825 |
+
"step": 5387
|
32826 |
+
},
|
32827 |
+
{
|
32828 |
+
"epoch": 0.04,
|
32829 |
+
"learning_rate": 0.0004,
|
32830 |
+
"loss": 4.7233,
|
32831 |
+
"step": 5388
|
32832 |
+
},
|
32833 |
+
{
|
32834 |
+
"epoch": 0.04,
|
32835 |
+
"learning_rate": 0.0004,
|
32836 |
+
"loss": 7.0959,
|
32837 |
+
"step": 5389
|
32838 |
+
},
|
32839 |
+
{
|
32840 |
+
"epoch": 0.04,
|
32841 |
+
"learning_rate": 0.0004,
|
32842 |
+
"loss": 5.5001,
|
32843 |
+
"step": 5390
|
32844 |
+
},
|
32845 |
+
{
|
32846 |
+
"epoch": 0.04,
|
32847 |
+
"learning_rate": 0.0004,
|
32848 |
+
"loss": 3.0294,
|
32849 |
+
"step": 5391
|
32850 |
+
},
|
32851 |
+
{
|
32852 |
+
"epoch": 0.04,
|
32853 |
+
"learning_rate": 0.0004,
|
32854 |
+
"loss": 7.0155,
|
32855 |
+
"step": 5392
|
32856 |
+
},
|
32857 |
+
{
|
32858 |
+
"epoch": 0.04,
|
32859 |
+
"learning_rate": 0.0004,
|
32860 |
+
"loss": 3.2199,
|
32861 |
+
"step": 5393
|
32862 |
+
},
|
32863 |
+
{
|
32864 |
+
"epoch": 0.04,
|
32865 |
+
"learning_rate": 0.0004,
|
32866 |
+
"loss": 3.3572,
|
32867 |
+
"step": 5394
|
32868 |
+
},
|
32869 |
+
{
|
32870 |
+
"epoch": 0.04,
|
32871 |
+
"learning_rate": 0.0004,
|
32872 |
+
"loss": 7.7692,
|
32873 |
+
"step": 5395
|
32874 |
+
},
|
32875 |
+
{
|
32876 |
+
"epoch": 0.04,
|
32877 |
+
"learning_rate": 0.0004,
|
32878 |
+
"loss": 7.6206,
|
32879 |
+
"step": 5396
|
32880 |
+
},
|
32881 |
+
{
|
32882 |
+
"epoch": 0.04,
|
32883 |
+
"learning_rate": 0.0004,
|
32884 |
+
"loss": 4.5941,
|
32885 |
+
"step": 5397
|
32886 |
+
},
|
32887 |
+
{
|
32888 |
+
"epoch": 0.04,
|
32889 |
+
"learning_rate": 0.0004,
|
32890 |
+
"loss": 3.6666,
|
32891 |
+
"step": 5398
|
32892 |
+
},
|
32893 |
+
{
|
32894 |
+
"epoch": 0.04,
|
32895 |
+
"learning_rate": 0.0004,
|
32896 |
+
"loss": 2.2717,
|
32897 |
+
"step": 5399
|
32898 |
+
},
|
32899 |
+
{
|
32900 |
+
"epoch": 0.04,
|
32901 |
+
"learning_rate": 0.0004,
|
32902 |
+
"loss": 4.9048,
|
32903 |
+
"step": 5400
|
32904 |
+
},
|
32905 |
+
{
|
32906 |
+
"epoch": 0.04,
|
32907 |
+
"eval_loss": 6.526280403137207,
|
32908 |
+
"eval_runtime": 22.3472,
|
32909 |
+
"eval_samples_per_second": 2.237,
|
32910 |
+
"eval_steps_per_second": 1.119,
|
32911 |
+
"step": 5400
|
32912 |
+
},
|
32913 |
+
{
|
32914 |
+
"epoch": 0.04,
|
32915 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
32916 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
32917 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
32918 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
32919 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
32920 |
+
"mmlu_loss": 3.8057762241363524,
|
32921 |
+
"step": 5400
|
32922 |
+
},
|
32923 |
+
{
|
32924 |
+
"epoch": 0.04,
|
32925 |
+
"step": 5400,
|
32926 |
+
"total_flos": 8.918950910784307e+16,
|
32927 |
+
"train_loss": 0.6445872698006807,
|
32928 |
+
"train_runtime": 1748.3273,
|
32929 |
+
"train_samples_per_second": 17.159,
|
32930 |
+
"train_steps_per_second": 17.159
|
32931 |
}
|
32932 |
],
|
32933 |
"max_steps": 30000,
|
32934 |
"num_train_epochs": 1,
|
32935 |
+
"total_flos": 8.918950910784307e+16,
|
32936 |
"trial_name": null,
|
32937 |
"trial_params": null
|
32938 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6011
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe7d931ebfbcece1009124b9eae98d1a465edd703240c0655ee9bb17db395973
|
3 |
size 6011
|